class PhisherPhinder::MailParser::Body::BlockClassifier
Public Class Methods
new(line_end)
click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 7 def initialize(line_end) @line_end = line_end end
Public Instance Methods
classify_block(contents)
click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 11 def classify_block(contents) lines = contents.split(@line_end) processing_block_headers = true output = { content_type: :text, character_set: :utf_8, content_transfer_encoding: nil } while processing_block_headers && lines.any? do line = lines.shift&.strip if line && line.empty? processing_block_headers = false elsif line && line =~ /\AContent-Type:/ output.merge!(extract_content_type(line)) output.merge!(extract_character_set(line)) elsif line && line =~ /\AContent-Transfer-Encoding/ output.merge!(extract_encoding(line)) end end output[:content] = lines.join(@line_end) output end
classify_headers(headers)
click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 39 def classify_headers(headers) output = { content_type: :text, character_set: :utf_8, content_transfer_encoding: nil } output.merge!(extract_content_type(headers[:content_type])) output.merge!(extract_character_set(headers[:content_type])) output.merge!(extract_encoding(headers[:content_transfer_encoding])) output end
Private Instance Methods
extract_character_set(content_type_string)
click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 71 def extract_character_set(content_type_string) if content_type_string charset_matches = content_type_string.match(/charset="?(?<charset>.+?)"?\z/) if charset_matches if charset_matches[:charset].downcase == 'utf-8' {character_set: :utf_8} elsif charset_matches[:charset].downcase == 'windows-1251' {character_set: :windows_1251} elsif charset_matches[:charset].downcase == 'iso-8859-1' {character_set: :iso_8859_1} else {} end else {} end else {} end end
extract_content_type(content_type_string)
click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 57 def extract_content_type(content_type_string) if content_type_string if content_type_string.include?('text/plain') {content_type: :text} elsif content_type_string.include?('text/html') {content_type: :html} else {} end else {} end end
extract_encoding(encoding_string)
click to toggle source
# File lib/phisher_phinder/mail_parser/body/block_classifier.rb, line 92 def extract_encoding(encoding_string) if encoding_string&.include? 'base64' {content_transfer_encoding: :base64} elsif encoding_string&.include? 'quoted-printable' {content_transfer_encoding: :quoted_printable} elsif encoding_string&.include? '7bit' {content_transfer_encoding: :seven_bit} else {} end end