module RIMS::RFC822::Parse

Constants

Address

Public Class Methods

parse_content_disposition(disposition_txt) click to toggle source
# File lib/rims/rfc822.rb, line 171
def parse_content_disposition(disposition_txt)
  split_parameters(disposition_txt)
end
parse_content_language(language_tags_txt) click to toggle source
# File lib/rims/rfc822.rb, line 176
def parse_content_language(language_tags_txt)
  tag_list = language_tags_txt.split(',')
  for tag in tag_list
    tag.strip!
    tag.freeze
  end
  tag_list.reject!(&:empty?)

  tag_list.freeze
end
parse_content_type(type_txt) click to toggle source
# File lib/rims/rfc822.rb, line 138
def parse_content_type(type_txt)
  media_type_txt, params = split_parameters(type_txt)
  if (media_type_txt) then
    main_type, sub_type = media_type_txt.split('/', 2)
    if (main_type) then
      main_type.strip!
      main_type.freeze
      if (sub_type) then
        sub_type.strip!
        sub_type.freeze
        if (! main_type.empty? && ! sub_type.empty?) then
          return [ main_type, sub_type, params ].freeze
        end
      end
    end
  end

  # See RFC2045 / 5.2. Content-Type Defaults
  # <https://tools.ietf.org/html/rfc2045#section-5.2>
  #
  #     Default RFC 822 messages without a MIME Content-Type header are taken
  #     by this protocol to be plain text in the US-ASCII character set,
  #     which can be explicitly specified as:
  #
  #       Content-type: text/plain; charset=us-ascii
  #
  [ 'text'.dup.force_encoding(type_txt.encoding).freeze,
    'plain'.dup.force_encoding(type_txt.encoding).freeze,
    params                # default is no charset, it will be `ASCII-8BIT'.
  ].freeze
end
parse_header(header_txt) click to toggle source
# File lib/rims/rfc822.rb, line 22
def parse_header(header_txt)
  field_pair_list = header_txt.scan(%r{
    ((?#name) \S+? )
    \s* : \s*
    (
       (?#value)
       .*? (?: \R|\z)
       (?: \s .*? (?: \R|\z) )*
    )
  }x)

  for name, value in field_pair_list
    value.strip!
    name.freeze
    value.freeze
  end

  field_pair_list.freeze
end
parse_mail_address_list(address_list_txt) click to toggle source
# File lib/rims/rfc822.rb, line 218
def parse_mail_address_list(address_list_txt)
  addr_list = []
  src_txt = address_list_txt.dup

  while (true)
    if (src_txt.sub!(%r{
          \A
          \s*
          (?<display_name> \S.*? ) \s* : (?<group_list> .*? ) ;
          \s*
          ,?
        }x, ''))
    then
      display_name = $~[:display_name]
      group_list = $~[:group_list]
      addr_list << Address.new(nil, nil, unquote_phrase(display_name), nil).freeze
      addr_list.concat(parse_mail_address_list(group_list))
      addr_list << Address.new(nil, nil, nil, nil).freeze
    elsif (src_txt.sub!(%r{
             \A
             \s*
             (?<local_part> [^<>@",\s]+ )
             \s* @ \s*
             (?<domain> [^<>@",\s]+ )
             \s*
             ,?
           }x, ''))
    then
      addr_list << Address.new(nil, nil, $~[:local_part].freeze, $~[:domain].freeze).freeze
    elsif (src_txt.sub!(%r{
             \A
             \s*
             (?<display_name> \S.*? )
             \s*
             <
               \s*
               (?:
                 (?<route>
                   @[^<>@",]*
                   (?:
                     , \s*
                     @[^<>@",]*
                   )*
                 )
                 \s*
                 :
               )?
               \s*
               (?<local_part> [^<>@",\s]+ )
               \s* @ \s*
               (?<domain> [^<>@",\s]+ )
               \s*
             >
             \s*
             ,?
           }x, ''))
    then
      display_name = $~[:display_name]
      route = $~[:route]
      local_part = $~[:local_part]
      domain = $~[:domain]
      addr_list << Address.new(unquote_phrase(display_name), route.freeze, local_part.freeze, domain.freeze).freeze
    else
      break
    end
  end

  addr_list.freeze
end
parse_multipart_body(boundary, body_txt) click to toggle source
# File lib/rims/rfc822.rb, line 188
def parse_multipart_body(boundary, body_txt)
  delim = '--' + boundary
  term = delim + '--'
  body_txt2, _body_epilogue_txt = body_txt.split(term, 2)
  if (body_txt2) then
    _body_preamble_txt, body_parts_txt = body_txt2.split(delim, 2)
    if (body_parts_txt) then
      part_list = body_parts_txt.split(delim, -1)
      for part_txt in part_list
        part_txt.lstrip!
        part_txt.chomp!("\n")
        part_txt.chomp!("\r")
        part_txt.freeze
      end
      return part_list.freeze
    end
  end

  [].freeze
end
parse_parameters(parameters_txt) click to toggle source
# File lib/rims/rfc822.rb, line 95
def parse_parameters(parameters_txt)
  params = {}
  parameters_txt.scan(%r{
    (?<name> \S+? )
    \s* = \s*
    (?:
      (?<quoted_string> ".*?" ) |
      (?<token> \S+? )
    )
    \s*
    (?: ; | \Z )
  }x) do
    name = $~[:name]
    if ($~[:quoted_string]) then
      quoted_value = $~[:quoted_string]
      value = unquote_phrase(quoted_value)
    else
      value = $~[:token]
    end
    params[name.downcase.freeze] = [ name.freeze, value.freeze ].freeze
  end

  params.freeze
end
split_message(msg_txt) click to toggle source
# File lib/rims/rfc822.rb, line 9
def split_message(msg_txt)
  header_txt, body_txt = msg_txt.lstrip.split(/\R\R/, 2)
  if ($&) then
    header_txt << $&
  else
    body_txt = header_txt
    header_txt = nil
  end

  [ header_txt.freeze, body_txt.freeze ].freeze
end
split_parameters(type_params_txt) click to toggle source
# File lib/rims/rfc822.rb, line 121
def split_parameters(type_params_txt)
  type, params_txt = type_params_txt.split(';', 2)
  if (type) then
    type.strip!
    type.freeze
    if (params_txt) then
      params = parse_parameters(params_txt)
    else
      params = {}.freeze
    end
    [ type, params ].freeze
  else
    [ nil, {}.freeze ].freeze
  end
end
unquote_phrase(phrase_txt) click to toggle source
# File lib/rims/rfc822.rb, line 43
def unquote_phrase(phrase_txt)
  state = :raw
  src_txt = phrase_txt.dup
  dst_txt = ''.encode(phrase_txt.encoding)

  while (src_txt.sub!(/\A (?: " | \( | \) | \\ | [^"\(\)\\]+ )/x, ''))
    match_txt = $&
    case (state)
    when :raw
      case (match_txt)
      when '"'
        state = :quote
      when '('
        state = :comment
      when "\\"
        unless (src_txt.empty?) then
          dst_txt << src_txt[0]
          src_txt[0] = ''
        end
      else
        dst_txt << match_txt
      end
    when :quote
      case (match_txt)
      when '"'
        state = :raw
      when "\\"
        unless (src_txt.empty?) then
          dst_txt << src_txt[0]
          src_txt[0] = ''
        end
      else
        dst_txt << match_txt
      end
    when :comment
      case (match_txt)
      when ')'
        state = :raw
      when "\\"
        src_txt[0] = ''
      else
        # ignore comment text.
      end
    else
      raise "internal error - unknown state: #{state}"
    end
  end

  dst_txt.freeze
end

Private Instance Methods

parse_content_disposition(disposition_txt) click to toggle source
# File lib/rims/rfc822.rb, line 171
def parse_content_disposition(disposition_txt)
  split_parameters(disposition_txt)
end
parse_content_language(language_tags_txt) click to toggle source
# File lib/rims/rfc822.rb, line 176
def parse_content_language(language_tags_txt)
  tag_list = language_tags_txt.split(',')
  for tag in tag_list
    tag.strip!
    tag.freeze
  end
  tag_list.reject!(&:empty?)

  tag_list.freeze
end
parse_content_type(type_txt) click to toggle source
# File lib/rims/rfc822.rb, line 138
def parse_content_type(type_txt)
  media_type_txt, params = split_parameters(type_txt)
  if (media_type_txt) then
    main_type, sub_type = media_type_txt.split('/', 2)
    if (main_type) then
      main_type.strip!
      main_type.freeze
      if (sub_type) then
        sub_type.strip!
        sub_type.freeze
        if (! main_type.empty? && ! sub_type.empty?) then
          return [ main_type, sub_type, params ].freeze
        end
      end
    end
  end

  # See RFC2045 / 5.2. Content-Type Defaults
  # <https://tools.ietf.org/html/rfc2045#section-5.2>
  #
  #     Default RFC 822 messages without a MIME Content-Type header are taken
  #     by this protocol to be plain text in the US-ASCII character set,
  #     which can be explicitly specified as:
  #
  #       Content-type: text/plain; charset=us-ascii
  #
  [ 'text'.dup.force_encoding(type_txt.encoding).freeze,
    'plain'.dup.force_encoding(type_txt.encoding).freeze,
    params                # default is no charset, it will be `ASCII-8BIT'.
  ].freeze
end
parse_header(header_txt) click to toggle source
# File lib/rims/rfc822.rb, line 22
def parse_header(header_txt)
  field_pair_list = header_txt.scan(%r{
    ((?#name) \S+? )
    \s* : \s*
    (
       (?#value)
       .*? (?: \R|\z)
       (?: \s .*? (?: \R|\z) )*
    )
  }x)

  for name, value in field_pair_list
    value.strip!
    name.freeze
    value.freeze
  end

  field_pair_list.freeze
end
parse_mail_address_list(address_list_txt) click to toggle source
# File lib/rims/rfc822.rb, line 218
def parse_mail_address_list(address_list_txt)
  addr_list = []
  src_txt = address_list_txt.dup

  while (true)
    if (src_txt.sub!(%r{
          \A
          \s*
          (?<display_name> \S.*? ) \s* : (?<group_list> .*? ) ;
          \s*
          ,?
        }x, ''))
    then
      display_name = $~[:display_name]
      group_list = $~[:group_list]
      addr_list << Address.new(nil, nil, unquote_phrase(display_name), nil).freeze
      addr_list.concat(parse_mail_address_list(group_list))
      addr_list << Address.new(nil, nil, nil, nil).freeze
    elsif (src_txt.sub!(%r{
             \A
             \s*
             (?<local_part> [^<>@",\s]+ )
             \s* @ \s*
             (?<domain> [^<>@",\s]+ )
             \s*
             ,?
           }x, ''))
    then
      addr_list << Address.new(nil, nil, $~[:local_part].freeze, $~[:domain].freeze).freeze
    elsif (src_txt.sub!(%r{
             \A
             \s*
             (?<display_name> \S.*? )
             \s*
             <
               \s*
               (?:
                 (?<route>
                   @[^<>@",]*
                   (?:
                     , \s*
                     @[^<>@",]*
                   )*
                 )
                 \s*
                 :
               )?
               \s*
               (?<local_part> [^<>@",\s]+ )
               \s* @ \s*
               (?<domain> [^<>@",\s]+ )
               \s*
             >
             \s*
             ,?
           }x, ''))
    then
      display_name = $~[:display_name]
      route = $~[:route]
      local_part = $~[:local_part]
      domain = $~[:domain]
      addr_list << Address.new(unquote_phrase(display_name), route.freeze, local_part.freeze, domain.freeze).freeze
    else
      break
    end
  end

  addr_list.freeze
end
parse_multipart_body(boundary, body_txt) click to toggle source
# File lib/rims/rfc822.rb, line 188
def parse_multipart_body(boundary, body_txt)
  delim = '--' + boundary
  term = delim + '--'
  body_txt2, _body_epilogue_txt = body_txt.split(term, 2)
  if (body_txt2) then
    _body_preamble_txt, body_parts_txt = body_txt2.split(delim, 2)
    if (body_parts_txt) then
      part_list = body_parts_txt.split(delim, -1)
      for part_txt in part_list
        part_txt.lstrip!
        part_txt.chomp!("\n")
        part_txt.chomp!("\r")
        part_txt.freeze
      end
      return part_list.freeze
    end
  end

  [].freeze
end
parse_parameters(parameters_txt) click to toggle source
# File lib/rims/rfc822.rb, line 95
def parse_parameters(parameters_txt)
  params = {}
  parameters_txt.scan(%r{
    (?<name> \S+? )
    \s* = \s*
    (?:
      (?<quoted_string> ".*?" ) |
      (?<token> \S+? )
    )
    \s*
    (?: ; | \Z )
  }x) do
    name = $~[:name]
    if ($~[:quoted_string]) then
      quoted_value = $~[:quoted_string]
      value = unquote_phrase(quoted_value)
    else
      value = $~[:token]
    end
    params[name.downcase.freeze] = [ name.freeze, value.freeze ].freeze
  end

  params.freeze
end
split_message(msg_txt) click to toggle source
# File lib/rims/rfc822.rb, line 9
def split_message(msg_txt)
  header_txt, body_txt = msg_txt.lstrip.split(/\R\R/, 2)
  if ($&) then
    header_txt << $&
  else
    body_txt = header_txt
    header_txt = nil
  end

  [ header_txt.freeze, body_txt.freeze ].freeze
end
split_parameters(type_params_txt) click to toggle source
# File lib/rims/rfc822.rb, line 121
def split_parameters(type_params_txt)
  type, params_txt = type_params_txt.split(';', 2)
  if (type) then
    type.strip!
    type.freeze
    if (params_txt) then
      params = parse_parameters(params_txt)
    else
      params = {}.freeze
    end
    [ type, params ].freeze
  else
    [ nil, {}.freeze ].freeze
  end
end
unquote_phrase(phrase_txt) click to toggle source
# File lib/rims/rfc822.rb, line 43
def unquote_phrase(phrase_txt)
  state = :raw
  src_txt = phrase_txt.dup
  dst_txt = ''.encode(phrase_txt.encoding)

  while (src_txt.sub!(/\A (?: " | \( | \) | \\ | [^"\(\)\\]+ )/x, ''))
    match_txt = $&
    case (state)
    when :raw
      case (match_txt)
      when '"'
        state = :quote
      when '('
        state = :comment
      when "\\"
        unless (src_txt.empty?) then
          dst_txt << src_txt[0]
          src_txt[0] = ''
        end
      else
        dst_txt << match_txt
      end
    when :quote
      case (match_txt)
      when '"'
        state = :raw
      when "\\"
        unless (src_txt.empty?) then
          dst_txt << src_txt[0]
          src_txt[0] = ''
        end
      else
        dst_txt << match_txt
      end
    when :comment
      case (match_txt)
      when ')'
        state = :raw
      when "\\"
        src_txt[0] = ''
      else
        # ignore comment text.
      end
    else
      raise "internal error - unknown state: #{state}"
    end
  end

  dst_txt.freeze
end