module EmailCollector
Public Class Methods
collect(searchReq, domain = nil)
click to toggle source
# File lib/email_collector.rb, line 19 def self.collect(searchReq, domain = nil) @keywords.map { |keyword| collect_plain("#{searchReq} #{keyword}", domain) }.flatten.uniq.compact end
collect_plain(searchReq, domain = nil)
click to toggle source
# File lib/email_collector.rb, line 23 def self.collect_plain(searchReq, domain = nil) #@logger.debug("searching for #{searchReq}") #@logger.debug("domain = #{domain}") if (domain) res = google_search("#{searchReq} \"#{domain}\"") res_at = google_search("#{searchReq} \"at #{domain}\"") (res + res_at).map do |context| #@logger.debug("context = #{context}") context = filter_at_domain(context, domain) context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@#{Regexp.quote(domain)}/i) end else google_search(searchReq).map do |context| @logger.debug("context = #{context}") context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@(?:[a-z0-9.-]+\.)+[a-z0-9]{2,}/i) end end end
filter_at(s)
click to toggle source
Replaces ‘at’ with @
# File lib/email_collector.rb, line 60 def self.filter_at(s) s.gsub(/\s+/, ' ').gsub(/[^a-z0-9_.%+-]+[ae]t[^a-z0-9.@-]+|([_+-]+)[ae]t\1/i, '@') end
filter_at_domain(s, domain)
click to toggle source
Replaces ***gmail.com with @gmail.com
# File lib/email_collector.rb, line 65 def self.filter_at_domain(s, domain) s.gsub(/[^a-z0-9_%+-]+#{Regexp.quote(domain)}/, '@' + domain) end
filter_b(s)
click to toggle source
Fixes google-search gem bold outline
# File lib/email_collector.rb, line 75 def self.filter_b(s) s.gsub(/<\/?b>/, '') end
filter_exclam(s)
click to toggle source
Transforms gmail!com addresses
# File lib/email_collector.rb, line 70 def self.filter_exclam(s) s.gsub(/[!:]/, '.') end
google_search(searchReq)
click to toggle source
# File lib/email_collector.rb, line 44 def self.google_search(searchReq) @logger.debug("searching for #{searchReq}") Google::Search::Web.new do |search| search.query = searchReq search.size = @size end.map do |item| #@logger.debug("URI = #{item.uri}") #@logger.debug(item.content) s = filter_b(item.content) s = filter_at(s) filter_exclam(s).split('...') end.flatten end
keywords=(k)
click to toggle source
# File lib/email_collector.rb, line 15 def self.keywords=(k) @keywords = k end
size=(s)
click to toggle source
# File lib/email_collector.rb, line 9 def self.size=(s) @size = s end