class Sisimai::Data

Sisimai::Data generate parsed data from Sisimai::Message object.

Constants

RFC822Head
RetryIndex

Public Class Methods

make(data: nil, **argvs) click to toggle source

Another constructor of Sisimai::Data @param [Sisimai::Message] data Data Object @param [Hash] argvs Parser options @options argvs [Boolean] delivered true: Including “delivered” reason @return [Array, Nil] List of Sisimai::Data or Nil if the

argument is not Sisimai::Message object
# File lib/sisimai/data.rb, line 92
def self.make(data: nil, **argvs)
  return nil unless data
  return nil unless data.is_a? Sisimai::Message

  messageobj = data
  rfc822data = messageobj.rfc822
  objectlist = []
  delivered1 = argvs[:delivered] || false

  return nil unless messageobj.ds
  return nil unless messageobj.rfc822

  eachobject = messageobj.ds.dup
  while e = eachobject.shift do
    # Create parameters for new() constructor.
    p = {
      'catch'          => messageobj.catch  || nil,
      'lhost'          => e['lhost']        || '',
      'rhost'          => e['rhost']        || '',
      'alias'          => e['alias']        || '',
      'action'         => e['action']       || '',
      'reason'         => e['reason']       || '',
      'replycode'      => e['replycode']    || '',
      'smtpagent'      => e['agent']        || '',
      'recipient'      => e['recipient']    || '',
      'softbounce'     => e['softbounce']   || '',
      'smtpcommand'    => e['command']      || '',
      'feedbacktype'   => e['feedbacktype'] || '',
      'diagnosticcode' => e['diagnosis']    || '',
      'diagnostictype' => e['spec']         || '',
      'deliverystatus' => e['status']       || '',
    }
    unless delivered1
      # Skip if the value of "deliverystatus" begins with "2." such as 2.1.5
      next if p['deliverystatus'].start_with?('2.')
    end

    # EMAIL_ADDRESS:
    # Detect email address from message/rfc822 part
    RFC822Head[:addresser].each do |f|
      # Check each header in message/rfc822 part
      next unless rfc822data[f]
      next if rfc822data[f].empty?

      j = Sisimai::Address.find(rfc822data[f]) || []
      next if j.empty?
      p['addresser'] = j[0]
      break
    end

    unless p['addresser']
      # Fallback: Get the sender address from the header of the bounced
      # email if the address is not set at loop above.
      j = Sisimai::Address.find(messageobj.header['to']) || []
      p['addresser'] = j[0] unless j.empty?
    end
    next unless p['addresser']
    next unless p['recipient']

    # TIMESTAMP:
    # Convert from a time stamp or a date string to a machine time.
    datestring = nil
    zoneoffset = 0
    datevalues = []
    datevalues << e['date'] unless e['date'].to_s.empty?

    # Date information did not exist in message/delivery-status part,...
    RFC822Head[:date].each do |f|
      # Get the value of Date header or other date related header.
      next unless rfc822data[f]
      datevalues << rfc822data[f]
    end

    # Set "date" getting from the value of "Date" in the bounce message
    datevalues << messageobj.header['date'] if datevalues.size < 2

    while v = datevalues.shift do
      # Parse each date value in the array
      datestring = Sisimai::DateTime.parse(v)
      break if datestring
    end

    if datestring && cv = datestring.match(/\A(.+)[ ]+([-+]\d{4})\z/)
      # Get the value of timezone offset from datestring
      # Wed, 26 Feb 2014 06:05:48 -0500
      datestring = cv[1]
      zoneoffset = Sisimai::DateTime.tz2second(cv[2])
      p['timezoneoffset'] = cv[2]
    end

    begin
      # Convert from the date string to an object then calculate time
      # zone offset.
      t = Sisimai::Time.strptime(datestring, '%a, %d %b %Y %T')
      p['timestamp'] = (t.to_time.to_i - zoneoffset) || nil
    rescue
      warn ' ***warning: Failed to strptime ' << datestring.to_s
    end
    next unless p['timestamp']

    # OTHER_TEXT_HEADERS:
    recvheader = data.header['received'] || []
    unless recvheader.empty?
      # Get localhost and remote host name from Received header.
      %w[lhost rhost].each { |a| e[a] ||= '' }
      e['lhost'] = Sisimai::RFC5322.received(recvheader[0]).shift if e['lhost'].empty?
      e['rhost'] = Sisimai::RFC5322.received(recvheader[-1]).pop  if e['rhost'].empty?
    end

    # Remove square brackets and curly brackets from the host variable
    %w[rhost lhost].each do |v|
      p[v].delete!('[]()')    # Remove square brackets and curly brackets from the host variable
      p[v].sub!(/\A.+=/, '')  # Remove string before "="
      p[v].chomp!("\r") if p[v].end_with?("\r") # Remove CR at the end of the value

      # Check space character in each value and get the first element
      p[v] = p[v].split(' ', 2).shift if p[v].include?(' ')
      p[v].chomp!('.') if p[v].end_with?('.')   # Remove "." at the end of the value
    end

    # Subject: header of the original message
    p['subject'] = rfc822data['subject'] || ''
    p['subject'].scrub!('?')
    p['subject'].chomp!("\r") if p['subject'].end_with?("\r")

    # The value of "List-Id" header
    p['listid'] = rfc822data['list-id'] || ''
    unless p['listid'].empty?
      # Get the value of List-Id header like "List name <list-id@example.org>"
      if cv = p['listid'].match(/\A.*([<].+[>]).*\z/) then p['listid'] = cv[1] end
      p['listid'].delete!('<>')
      p['listid'].chomp!("\r") if p['listid'].end_with?("\r")
      p['listid'] = '' if p['listid'].include?(' ')
    end

    # The value of "Message-Id" header
    p['messageid'] = rfc822data['message-id'] || ''
    unless p['messageid'].empty?
      # Leave only string inside of angle brackets(<>)
      if cv = p['messageid'].match(/\A([^ ]+)[ ].*/) then p['messageid'] = cv[1] end
      if cv = p['messageid'].match(/[<]([^ ]+?)[>]/) then p['messageid'] = cv[1] end
    end

    # CHECK_DELIVERY_STATUS_VALUE:
    # Cleanup the value of "Diagnostic-Code:" header
    unless p['diagnosticcode'].empty?
      # Count the number of D.S.N. and SMTP Reply Code
      vs = Sisimai::SMTP::Status.find(p['diagnosticcode'])
      vr = Sisimai::SMTP::Reply.find(p['diagnosticcode'])
      vm = 0
      re = nil

      if vs
        # How many times does the D.S.N. appeared
        vm += p['diagnosticcode'].scan(/\b#{vs}\b/).size
        p['deliverystatus'] = vs if vs =~ /\A[45][.][1-9][.][1-9]\z/
      end

      if vr
        # How many times does the SMTP reply code appeared
        vm += p['diagnosticcode'].scan(/\b#{vr}\b/).size
        p['replycode'] ||= vr
      end

      if vm > 2
        # Build regular expression for removing string like '550-5.1.1'
        # from the value of "diagnosticcode"
        re = %r/[ ]#{vr}[- ](?:#{vs})?/

        # 550-5.7.1 [192.0.2.222] Our system has detected that this message is
        # 550-5.7.1 likely unsolicited mail. To reduce the amount of spam sent to Gmail,
        # 550-5.7.1 this message has been blocked. Please visit
        # 550 5.7.1 https://support.google.com/mail/answer/188131 for more information.
        p['diagnosticcode'] = Sisimai::String.sweep(p['diagnosticcode'].gsub(re, ' '))
      end
    end

    p['diagnostictype'] ||= 'X-UNIX' if p['reason'] == 'mailererror'
    p['diagnostictype'] ||= 'SMTP' unless %w[feedback vacation].include?(p['reason'])

    # Check the value of SMTP command
    p['smtpcommand'] = '' unless %w[EHLO HELO MAIL RCPT DATA QUIT].include?(p['smtpcommand'])
    p['origin'] = argvs[:origin]  # Set the path to the original email

    if p['action'].empty?
      # Check the value of "action"
      if p['reason'] == 'expired'
        # Action: delayed
        p['action'] = 'delayed'
      elsif p['deliverystatus'].start_with?('5', '4')
        # Action: failed
        p['action'] = 'failed'
      end
    end

    o = Sisimai::Data.new(p)
    next unless o.recipient

    if o.reason.empty? || RetryIndex[o.reason]
      # Decide the reason of email bounce
      r = ''; r = Sisimai::Rhost.get(o) if Sisimai::Rhost.match(o.rhost)
      if r.empty?
        # Failed to detect a bounce reason by the value of "rhost"
        r = Sisimai::Rhost.get(o, o.destination) if Sisimai::Rhost.match(o.destination)
        r = Sisimai::Reason.get(o) if r.empty?
        r = 'undefined' if r.empty?
      end
      o.reason = r
    end

    if %w[delivered feedback vacation].include?(o.reason)
      # The value of reason is "vacation" or "feedback"
      o.softbounce = -1
      o.replycode = '' unless o.reason == 'delivered'
    else
      # Bounce message which reason is "feedback" or "vacation" does
      # not have the value of "deliverystatus".
      softorhard = nil

      if o.softbounce.to_s.empty?
        # The value is not set yet
        textasargv   = (p['deliverystatus'] + ' ' + p['diagnosticcode']).lstrip
        softorhard   = Sisimai::SMTP::Error.soft_or_hard(o.reason, textasargv) || ''
        o.softbounce = if softorhard.size > 0
                         # Returned value is "soft" or "hard"
                         (softorhard == 'soft') ? 1 : 0
                       else
                         # Returned value is an empty string
                         -1
                       end
      end

      if o.deliverystatus.empty?
        # Set pseudo status code
        textasargv = (o.replycode + ' ' + p['diagnosticcode']).lstrip
        getchecked = Sisimai::SMTP::Error.is_permanent(textasargv)
        tmpfailure = getchecked.nil? ? false : (getchecked ? false : true)

        if pseudocode = Sisimai::SMTP::Status.code(o.reason, tmpfailure)
          # Set the value of "deliverystatus" and "softbounce"
          o.deliverystatus = pseudocode

          if o.softbounce < 0
            # set the value of "softbounce" again when the value is -1
            softorhard = Sisimai::SMTP::Error.soft_or_hard(o.reason, pseudocode)

            o.softbounce = if softorhard.size > 0
                             # Returned value is "soft" or "hard"
                             softorhard == 'soft' ? 1 : 0
                           else
                             # Returned value is an empty string
                             -1
                           end
          end
        end
      end

      unless o.replycode.empty?
        # Check both of the first digit of "deliverystatus" and "replycode"
        o.replycode = '' unless o.replycode[0, 1] == o.deliverystatus[0, 1]
      end

    end
    objectlist << o

  end
  return objectlist
end
new(argvs) click to toggle source

Constructor of Sisimai::Data @param [Hash] argvs Data @return [Sisimai::Data] Structured email data

# File lib/sisimai/data.rb, line 48
def initialize(argvs)
  # Create email address object
  as = Sisimai::Address.make(argvs['addresser'])
  ar = Sisimai::Address.make(address: argvs['recipient'])

  return nil unless as.is_a? Sisimai::Address
  return nil unless ar.is_a? Sisimai::Address
  return nil if as.void
  return nil if ar.void

  @addresser = as
  @recipient = ar
  @senderdomain = as.host
  @destination  = ar.host
  @alias = argvs['alias'] || ''
  @token = Sisimai::String.token(as.address, ar.address, argvs['timestamp'])
  @timestamp = Sisimai::Time.parse(::Time.at(argvs['timestamp']).to_s)
  @timezoneoffset = argvs['timezoneoffset'] || '+0000'
  @lhost          = argvs['lhost']          || ''
  @rhost          = argvs['rhost']          || ''
  @catch          = argvs['catch']          || nil
  @reason         = argvs['reason']         || ''
  @listid         = argvs['listid']         || ''
  @subject        = argvs['subject']        || ''
  @messageid      = argvs['messageid']      || ''
  @smtpagent      = argvs['smtpagent']      || ''
  @diagnosticcode = argvs['diagnosticcode'] || ''
  @diagnostictype = argvs['diagnostictype'] || ''
  @deliverystatus = argvs['deliverystatus'] || ''
  @smtpcommand    = argvs['smtpcommand']    || ''
  @feedbacktype   = argvs['feedbacktype']   || ''
  @action         = argvs['action']         || ''
  @origin         = argvs['origin']         || ''
  @replycode      = argvs['replycode']      || ''
  @replycode      = Sisimai::SMTP::Reply.find(argvs['diagnosticcode']).to_s if @replycode.empty?
  @softbounce     = argvs['softbounce']     || ''
end

Public Instance Methods

damn() click to toggle source

Convert from object to hash reference @return [Hash] Data in Hash reference

# File lib/sisimai/data.rb, line 363
def damn
  data = {}
  @@rwaccessors.each do |e|
    next if %w[addresser recipient timestamp].include?(e.to_s)
    data[e.to_s] = self.send(e) || ''
  end
  data['addresser'] = self.addresser.address
  data['recipient'] = self.recipient.address
  data['timestamp'] = self.timestamp.to_time.to_i
  return data
end
Also aliased as: to_hash
dump(type = 'json') click to toggle source

Data dumper @param [String] type Data format: json, yaml @return [String, Nil] Dumped data or nil if the value of the first

argument is neither "json" nor "yaml"
# File lib/sisimai/data.rb, line 380
def dump(type = 'json')
  return nil unless %w[json yaml].include?(type)
  referclass = 'Sisimai::Data::' << type.upcase

  begin
    require referclass.downcase.gsub('::', '/')
  rescue
    warn '***warning: Failed to load' << referclass
  end

  dumpeddata = Module.const_get(referclass).dump(self)
  return dumpeddata
end
to_hash()
Alias for: damn
to_json(*) click to toggle source

JSON handler @return [String] JSON string converted from Sisimai::Data

# File lib/sisimai/data.rb, line 396
def to_json(*)
  return self.dump('json')
end