module NotesStructuredTextJsonMessages
Constants
- BCC
- CC
- FROM
- INET_BCC
- INET_CC
- INET_FROM
- INET_TO
- IN_REPLY_TO
- MESSAGE_ID
- NOTES_US_DATE_FORMAT
- POSTED_DATE
- REFERENCES
- TO
Attributes
logger[RW]
mappings[RW]
stats[RW]
Public Instance Methods
collect_mapping(addr1, addr2)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 21 def collect_mapping(addr1, addr2) self.mappings << [addr1, addr2] if self.mappings end
extract_json_message(block, options={})
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 221 def extract_json_message(block, options={}) message_id_h = header_value(block, MESSAGE_ID) if !message_id_h increment_stats(:failure_no_message_id) raise "no #{MESSAGE_ID}" end message_id = strip_angles(message_id_h) posted_date_h = header_value(block, POSTED_DATE) if !posted_date_h increment_stats(:failure_no_posted_date) raise "no #{POSTED_DATE}" end posted_date = parse_date(posted_date_h, options) in_reply_to_h = header_value(block, IN_REPLY_TO) in_reply_to = strip_angles(in_reply_to_h) if in_reply_to_h references_h = header_values(block, REFERENCES, " ") references = references_h.map{|r| strip_angles(r)} if references_h froms = process_addresses(block, INET_FROM, FROM, options) if !froms || froms.size>1 increment_stats(:failure_from) raise "no From:, or more than one From:" end from = froms[0] to = process_addresses(block, INET_TO, TO, options) cc = process_addresses(block, INET_CC, CC, options) bcc = process_addresses(block, INET_BCC, BCC, options) if (to||[]).size + (cc||[]).size + (bcc||[]).size == 0 increment_stats(:failure_no_recipients) raise "no recipients" end { :message_type=>"email", :message_id=>message_id, :sent_at=>posted_date, :in_reply_to=>in_reply_to, :references=>references, :from=>from, :to=>to, :cc=>cc, :bcc=>bcc} end
header_value(block, header)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 84 def header_value(block, header) patt = /^#{Regexp.quote(header)}: /i h = block.find{|l| l =~ patt} h.gsub(patt, '').strip if h end
header_values(block, header, split_on=",")
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 90 def header_values(block, header, split_on=",") h = header_value(block, header) if h if split_on.is_a?(Symbol) self.send(split_on, h) else h.split(split_on) end.map(&:strip) end end
increment_stats(key)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 17 def increment_stats(key) self.stats[key] = (self.stats[key]||0) + 1 end
is_distinguished_name?(addr)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 80 def is_distinguished_name?(addr) !!(addr =~ /CN=/) end
is_message_block?(block)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 76 def is_message_block?(block) !!header_value(block, MESSAGE_ID) end
json_messages(output_dir, input_files, options={})
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 49 def json_messages(output_dir, input_files, options={}) with_stats do with_mappings(options) do [*input_files].each do |input_file| File.open(input_file, "r") do |input| json_messages_from_stream(output_dir, input, options) end end end end end
json_messages_from_stream(output_dir, input, options={})
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 61 def json_messages_from_stream(output_dir, input, options={}) block = nil process_block(output_dir, block, options) while block=read_block(input) end
log() { |logger| ... }
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 13 def log yield logger if logger end
output_json_message(output_dir, json_message)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 268 def output_json_message(output_dir, json_message) fname = File.join(output_dir, "#{MD5.hexdigest(json_message[:message_id])}.json") File.open(fname, "w"){|out| out << json_message.to_json} end
parse_date(date, options={})
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 217 def parse_date(date, options={}) DateTime.strptime(date, NOTES_US_DATE_FORMAT) end
process_address(addr)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 163 def process_address(addr) if is_distinguished_name?(addr) name = addr[/CN=([^\/]*)/, 1] h = {:notes_dn=>addr} h[:name] = name if name h else ta = TMail::Address.parse(addr) if ta.is_a?(TMail::Address) h = {:email_address=>ta.address.downcase} h[:name] = ta.name if ta.name h else log{|logger| logger.warn("addr does not parse to a TMail::Address: #{addr}")} end end end
process_address_pair(inet_addr, notes_addr, options)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 181 def process_address_pair(inet_addr, notes_addr, options) if inet_addr == "." process_address(notes_addr) else inet_addr = process_address(inet_addr) notes_addr = process_address(notes_addr) collect_mapping(notes_addr, inet_addr) inet_addr end end
process_addresses(block, inet_field, notes_field, options)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 194 def process_addresses(block, inet_field, notes_field, options) inet_h = header_values(block, inet_field, :split_rfc822_addresses) notes_h = header_values(block, notes_field, :split_rfc822_addresses) if inet_h && notes_h if inet_h.length == notes_h.length inet_h.zip(notes_h).map do |inet_addr, notes_addr| process_address_pair(inet_addr, notes_addr, options) end else raise "#{inet_field}: does not match #{notes_field}:" end elsif inet_h inet_h.map{|addr| process_address(addr)} elsif notes_h notes_h.map{|addr| process_address(addr)} else nil end end
process_block(output_dir, block, options={})
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 134 def process_block(output_dir, block, options={}) if is_message_block?(block) json_message = extract_json_message(block, options) output_json_message(output_dir, json_message) increment_stats(:message) else increment_stats(:non_message) end rescue Exception=>e increment_stats(:failed_message) log do |logger| logger.error(e) logger.error(block.join("\n")) end end
read_block(input)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 66 def read_block(input) return nil if input.eof? block = [] begin l = input.readline.chomp block << l if l.length>0 end while !input.eof? && l != "" block end
split_rfc822_addresses(header)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 101 def split_rfc822_addresses(header) addresses = [] quoted_pair = false quoted_string = false buf = "" header.each_char do |c| if quoted_pair buf << c quoted_pair = false elsif quoted_string && c=='\\' buf << c quoted_pair = true elsif !quoted_string && c==',' addresses << buf buf = "" elsif !quoted_string && c=='"' buf << c quoted_string = true elsif quoted_string && c=='"' buf << c quoted_string = false else buf << c end end addresses << buf if buf.length>0 addresses end
strip_angles(value)
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 130 def strip_angles(value) value.gsub(/<([^>]*)>/, '\1') end
with_mappings(options) { || ... }
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 25 def with_mappings(options) mapping_file = options[:mapping_file] log{|logger| logger.info "using mapping_file: #{options[:mapping_file]}"} if mapping_file self.mappings=[] yield ensure if mapping_file File.open(mapping_file, "w") do |output| output << "[\n" output << self.mappings.map(&:to_json).join(",\n") output << "\n]" end end end
with_stats() { || ... }
click to toggle source
# File lib/notes_structured_text_json_messages.rb, line 40 def with_stats self.stats={} yield ensure self.stats.each do |k,v| log{|logger| logger.info("#{k}: #{v}")} end end