module Masscan::Parsers::Binary

Parses the `masscan -oB` output format.

@note Ported from github.com/robertdavidgraham/masscan/blob/1.3.2/src/in-binary.c

@api semipublic

Constants

APP_PROTOCOLS

List of application protocol keywords.

BUF_MAX

Maximum buffer length for a single record.

IP_PROTOCOLS

Mapping of IP protocol numbers to keywords.

MASSCAN_MAGIC

The `masscan` binary format magic string.

MASSCAN_VERSION_FAMILY

Masscan binary format version compatibility.

PSEUDO_RECORD_SIZE

The “pseudo record” length

Public Class Methods

decode_ipv4(ip) click to toggle source

Decodes an IPv4 address from an integer.

@param [Integer] ip

The IP in raw integer form.

@return [IPAddr]

The decoded IPv4 address.
# File lib/masscan/parsers/binary.rb, line 216
def self.decode_ipv4(ip)
  IPAddr.new(ip,Socket::AF_INET)
end
decode_ipv6(ipv6_hi,ipv6_lo) click to toggle source

Decodes an IPv6 address from two 64bit integers.

@param [Integer] ipv6_hi

The top-half of the 128bit IPv6 address.

@param [Integer] ipv6_lo

The top-half of the 128bit IPv6 address.

@return [IPAddr]

The decoded IPv6 address.
# File lib/masscan/parsers/binary.rb, line 232
def self.decode_ipv6(ipv6_hi,ipv6_lo)
  IPAddr.new((ipv6_hi << 64) | ipv6_lo,Socket::AF_INET6)
end
decode_reason(reason) click to toggle source

Decodes a reason bitflag.

@param [Integer] reason

The reason bitflag.

@return [Array<:fin, :syn, :rst, :psh, :ack, :urg, :ece, :cwr>]

The reason flags.
# File lib/masscan/parsers/binary.rb, line 271
def self.decode_reason(reason)
  flags = []
  flags << :fin if (reason & 0x01) != 0
  flags << :syn if (reason & 0x02) != 0
  flags << :rst if (reason & 0x04) != 0
  flags << :psh if (reason & 0x08) != 0
  flags << :ack if (reason & 0x10) != 0
  flags << :urg if (reason & 0x20) != 0
  flags << :ece if (reason & 0x40) != 0
  flags << :cwr if (reason & 0x80) != 0
  flags
end
decode_timestamp(timestamp) click to toggle source

Decodes a timestamp from an integer.

@param [Integer] timestamp

The raw UNIX timestamp integer.

@return [Time]

The decoded time value.
# File lib/masscan/parsers/binary.rb, line 203
def self.decode_timestamp(timestamp)
  Time.at(timestamp)
end
lookup_app_protocol(proto) click to toggle source

Looks up an application protocol number.

@param [Integer] proto

The application protocol number.

@return [Symbol, nil]

The application protocol keyword.

@see APP_PROTOCOLS

# File lib/masscan/parsers/binary.rb, line 330
def self.lookup_app_protocol(proto)
  APP_PROTOCOLS[proto]
end
lookup_ip_protocol(proto) click to toggle source

Looks up an IP protocol number.

@param [Integer] proto

The IP protocol number.

@return [:icmp, :tcp, :udp, :sctp, nil]

The IP protocol keyword.

@see IP_PROTOCOLS

# File lib/masscan/parsers/binary.rb, line 258
def self.lookup_ip_protocol(proto)
  IP_PROTOCOLS[proto]
end
open(path,&block) click to toggle source

Opens a binary file for parsing.

@param [String] path

The path to the file.

@yield [file]

If a block is given, it will be passed the opened file.
Once the block returns, the file will be closed.

@yieldparam [File]

The opened file.

@return [File]

If no block was given, the opened file will be returned.
# File lib/masscan/parsers/binary.rb, line 32
def self.open(path,&block)
  File.open(path,'rb',&block)
end
parse(io) { |record| ... } click to toggle source

Parses masscan binary data.

@param [IO] io

The IO object to read from.

@yield [record]

If a block is given, it will be passed each parsed record.

@yieldparam [Status, Banner] record

A parsed record, either a {Status} or a {Banner} object.

@return [Enumerator]

If no block is given, it will return an Enumerator.
# File lib/masscan/parsers/binary.rb, line 57
def self.parse(io)
  return enum_for(__method__,io) unless block_given?

  pseudo = read_pseudo_record(io)

  # look for the start time
  if (match = pseudo.match(/s:(\d+)/))
    start_time = decode_timestamp(match[1].to_i)
  end

  total_records = 0

  # read all records
  loop do
    # read the TYPE field
    unless (type = read_multibyte_uint(io))
      return
    end

    # read the LENGTH field
    unless (length = read_multibyte_uint(io))
      return
    end

    if length > BUF_MAX
      raise(CorruptedFile,"file corrupted")
    end

    # read the remainder of the record
    buffer = io.read(length)

    if buffer.length < length
      return
    end

    # parse the specific record type
    record = case type
             when 1 # STATUS: open
               parse_status(buffer,:open)
             when 2 # STATUS: closed
               parse_status(buffer,:closed)
             when 3 # BANNER
               parse_banner3(buffer)
             when 4
               io.getbyte
               parse_banner4(buffer)
             when 5
               parse_banner4(buffer)
             when 6 # STATUS: open
               parse_status2(buffer,:open)
             when 7 # STATUS: closed
               parse_status2(buffer,:closed)
             when 9
               parse_banner9(buffer)
             when 10 # Open6
               parse_status6(buffer,:open)
             when 11 # Closed6
               parse_status6(buffer,:closed)
             when 13 # Banner6
               parse_banner6(buffer)
             when 109 # 'm'.ord # FILEHEADER
               next
             else
               raise(CorruptedFile,"unknown type: #{type.inspect}")
             end

    if record
      start_time ||= record.timestamp

      yield record

      total_records += 1
    end
  end

  return total_records
end
parse_banner3(buffer) click to toggle source

Parses a banner record.

@param [String] buffer

The buffer to parse.

@return [Buffer]

The parsed buffer record.
# File lib/masscan/parsers/binary.rb, line 389
def self.parse_banner3(buffer)
  timestamp, ip, port, app_proto, payload = buffer.unpack('L>L>S>S>A*')

  timestamp = decode_timestamp(timestamp)
  ip        = decode_ipv4(ip)
  app_proto = lookup_app_protocol(app_proto)

  # defaults
  ip_proto = :tcp
  ttl = 0

  return Banner.new(
    protocol:     ip_proto,
    port:         port,
    ttl:          ttl,
    ip:           ip,
    timestamp:    timestamp,
    app_protocol: app_proto,
    payload:      payload
  )
end
parse_banner4(buffer) click to toggle source

Parses a banner record.

@param [String] buffer

The buffer to parse.

@return [Buffer]

The parsed buffer record.
# File lib/masscan/parsers/binary.rb, line 420
def self.parse_banner4(buffer)
  if buffer.length < 13
    return
  end

  timestamp, ip, ip_proto, port, app_proto, payload = buffer.unpack('L>L>CS>S>A*')

  timestamp = decode_timestamp(timestamp)
  ip        = decode_ipv4(ip)
  ip_proto  = lookup_ip_protocol(ip_proto)
  app_proto = lookup_app_protocol(app_proto)

  # defaults
  ttl = 0

  return Banner.new(
    protocol:     ip_proto,
    port:         port,
    ttl:          ttl,
    ip:           ip,
    timestamp:    timestamp,
    app_protocol: app_proto,
    payload:      payload
  )
end
parse_banner6(buffer) click to toggle source

Parses a banner record.

@param [String] buffer

The buffer to parse.

@return [Buffer]

The parsed buffer record.
# File lib/masscan/parsers/binary.rb, line 568
def self.parse_banner6(buffer)
  timestamp, ip_proto, port, app_proto, ttl, ip_version, ipv6_hi, ipv6_lo, payload = buffer.unpack('L>CS>S>CCQ>Q>A*')
  timestamp  ||= 0xffffffff
  protocol   ||= 0xff
  port       ||= 0xffff
  app_proto  ||= 0xffff
  ttl        ||= 0xff
  ip_version ||= 0xff
  ipv6_hi    ||= 0xffffffff_ffffffff
  ipv6_lo    ||= 0xffffffff_ffffffff

  timestamp = decode_timestamp(timestamp)
  ip_proto  = lookup_ip_protocol(ip_proto)
  app_proto = lookup_app_protocol(app_proto)
  ipv6      = decode_ipv6(ipv6_hi,ipv6_lo)

  return Banner.new(
    protocol:     ip_proto,
    port:         port,
    ttl:          ttl,
    ip:           ipv6,
    timestamp:    timestamp,
    app_protocol: app_proto,
    payload:      payload
  )
end
parse_banner9(buffer) click to toggle source

Parses a banner record.

@param [String] buffer

The buffer to parse.

@return [Buffer]

The parsed buffer record.
# File lib/masscan/parsers/binary.rb, line 494
def self.parse_banner9(buffer)
  if buffer.length < 14
    return
  end

  timestamp, ip, ip_proto, port, app_proto, ttl, payload = buffer.unpack('L>L>CS>S>CA*')
  timestamp = decode_timestamp(timestamp)
  ip        = decode_ipv4(ip)
  ip_proto  = lookup_ip_protocol(ip_proto)
  app_proto = lookup_app_protocol(app_proto)

  return Banner.new(
    protocol:     ip_proto,
    port:         port,
    ttl:          ttl,
    ip:           ip,
    timestamp:    timestamp,
    app_protocol: app_proto,
    payload:      payload
  )
end
parse_status(buffer,status) click to toggle source

Parses a status record.

@param [String] buffer

The buffer to parse.

@param [:open, :closed] status

Indicates whether the port status is open or closed.

@return [Status]

The parsed status record.
# File lib/masscan/parsers/binary.rb, line 346
def self.parse_status(buffer,status)
  if buffer.length < 12
    return
  end

  timestamp, ip, port, reason, ttl = buffer.unpack("L>L>S>CC")

  timestamp = decode_timestamp(timestamp)
  ip        = decode_ipv4(ip)
  reason    = decode_reason(reason)

  # if ARP, there will be a MAC address after the record
  mac = if ip == 0 && buffer.length >= 12+6
          buffer[12+6,6]
        end

  protocol = case port
             when 53, 123, 137, 161 then  :udp
             when 36422, 36412, 2905 then :sctp
             else                         :tcp
             end

  return Status.new(
    status:    status,
    protocol:  protocol,
    port:      port,
    reason:    reason,
    ttl:       ttl,
    ip:        ip,
    timestamp: timestamp,
    mac:       mac
  )
end
parse_status2(buffer,status) click to toggle source

Parses a status record.

@param [String] buffer

The buffer to parse.

@param [:open, :closed] status

Indicates whether the port status is open or closed.

@return [Status]

The parsed status record.
# File lib/masscan/parsers/binary.rb, line 458
def self.parse_status2(buffer,status)
  if buffer.length < 13
    return
  end

  timestamp, ip, ip_proto, port, reason, ttl = buffer.unpack('L>L>CS>CC')
  timestamp = decode_timestamp(timestamp)
  ip        = decode_ipv4(ip)
  ip_proto  = lookup_ip_protocol(ip_proto)
  reason    = decode_reason(reason)

  mac = if ip == 0 && buffer.length >= 13+6
          buffer[13,6]
        end

  return Status.new(
    status:    status,
    protocol:  ip_proto,
    port:      port,
    reason:    reason,
    ttl:       ttl,
    ip:        ip,
    timestamp: timestamp,
    mac:       mac
  )
end
parse_status6(buffer,status) click to toggle source

Parses a status record.

@param [String] buffer

The buffer to parse.

@param [:open, :closed] status

Indicates whether the port status is open or closed.

@return [Status]

The parsed status record.
# File lib/masscan/parsers/binary.rb, line 528
def self.parse_status6(buffer,status)
  timestamp, ip_proto, port, reason, ttl, ip_version, ipv6_hi, ipv6_lo = buffer.unpack('L>CS>CCCQ>Q>')
  timestamp  ||= 0xffffffff
  ip_proto   ||= 0xff
  port       ||= 0xffff
  reason     ||= 0xff
  ttl        ||= 0xff
  ip_version ||= 0xff
  ipv6_hi    ||= 0xffffffff_ffffffff
  ipv6_lo    ||= 0xffffffff_ffffffff

  unless ip_version == 6
    raise(CorruptedFile,"expected ip_version to be 6: #{ip_version.inspect}")
  end

  timestamp = decode_timestamp(timestamp)
  ip_proto  = lookup_ip_protocol(ip_proto)
  reason    = decode_reason(reason)
  ipv6      = decode_ipv6(ipv6_hi,ipv6_lo)

  return Status.new(
    status:    status,
    protocol:  ip_proto,
    port:      port,
    reason:    reason,
    ttl:       ttl,
    ip:        ipv6,
    timestamp: timestamp
  )
end
read_multibyte_uint(io) click to toggle source

Reads a multi-byte unsigned integer.

@param [IO] io

The IO object to read from.

@return [Integer, nil]

The unsigned integer, or `nil` if End-of-Stream was reached.
# File lib/masscan/parsers/binary.rb, line 176
def self.read_multibyte_uint(io)
  unless (b = io.getbyte)
    return
  end

  type = b & 0x7f

  while (b & 0x80) != 0
    unless (b = io.getbyte)
      return
    end

    type = (type << 7) | (b & 0x7f)
  end

  return type
end
read_pseudo_record(io) click to toggle source

Reads the “pseudo record” at the beginning of the file.

@param [IO] io

The IO object to read from.

@return [String]

The read buffer.
# File lib/masscan/parsers/binary.rb, line 153
def self.read_pseudo_record(io)
  buffer = io.read(PSEUDO_RECORD_SIZE)

  if buffer.length < PSEUDO_RECORD_SIZE
    raise(CorruptedFile,"invalid masscan binary format")
  end

  unless buffer.start_with?(MASSCAN_MAGIC)
    raise(CorruptedFile,"unknown file format (expected #{MASSCAN_MAGIC})")
  end

  return buffer
end