class PhisherPhinder::MailParser::Body::BlockClassifier

Public Class Methods

new(line_end) click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 7
def initialize(line_end)
  @line_end = line_end
end

Public Instance Methods

classify_block(contents) click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 11
def classify_block(contents)
  lines = contents.split(@line_end)
  processing_block_headers = true

  output = {
    content_type: :text,
    character_set: :utf_8,
    content_transfer_encoding: nil
  }

  while processing_block_headers && lines.any? do
    line = lines.shift&.strip
    if line && line.empty?
      processing_block_headers = false
    elsif line && line =~ /\AContent-Type:/
      output.merge!(extract_content_type(line))

      output.merge!(extract_character_set(line))
    elsif line && line =~ /\AContent-Transfer-Encoding/
      output.merge!(extract_encoding(line))
    end
  end

  output[:content] = lines.join(@line_end)

  output
end
classify_headers(headers) click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 39
def classify_headers(headers)
  output = {
    content_type: :text,
    character_set: :utf_8,
    content_transfer_encoding: nil
  }

  output.merge!(extract_content_type(headers[:content_type]))

  output.merge!(extract_character_set(headers[:content_type]))

  output.merge!(extract_encoding(headers[:content_transfer_encoding]))

  output
end

Private Instance Methods

extract_character_set(content_type_string) click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 71
def extract_character_set(content_type_string)
  if content_type_string
    charset_matches = content_type_string.match(/charset="?(?<charset>.+?)"?\z/)
    if charset_matches
      if charset_matches[:charset].downcase == 'utf-8'
        {character_set: :utf_8}
      elsif charset_matches[:charset].downcase == 'windows-1251'
        {character_set: :windows_1251}
      elsif charset_matches[:charset].downcase == 'iso-8859-1'
        {character_set: :iso_8859_1}
      else
        {}
      end
    else
      {}
    end
  else
    {}
  end
end
extract_content_type(content_type_string) click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 57
def extract_content_type(content_type_string)
  if content_type_string
    if content_type_string.include?('text/plain')
      {content_type: :text}
    elsif content_type_string.include?('text/html')
      {content_type: :html}
    else
      {}
    end
  else
    {}
  end
end
extract_encoding(encoding_string) click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 92
def extract_encoding(encoding_string)
  if encoding_string&.include? 'base64'
    {content_transfer_encoding: :base64}
  elsif encoding_string&.include? 'quoted-printable'
    {content_transfer_encoding: :quoted_printable}
  elsif encoding_string&.include? '7bit'
    {content_transfer_encoding: :seven_bit}
  else
    {}
  end
end