class PhishingSet
Creates a collection of phishing hashes for each phishtank page Example {
id: "4141251", url: "http://bintango.xyz/AIsaE", created_at: "added on Jun 3rd 2016 3:57 PM", submitter: "PhishReporter", valid: "Unknown", online: "ONLINE"
}
Attributes
all[R]
url[R]
Public Class Methods
new(url)
click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 21 def initialize(url) @url = URI(url) @page = Nokogiri::HTML(open(@url.to_s)) @all = scrape_parse end
Public Instance Methods
each() { |ph| ... }
click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 68 def each @all.each{ |ph| yield ph } end
first()
click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 59 def first @all.first end
page_at_id(id)
click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 63 def page_at_id(id) last_subm_id = self.first[:id].to_i ((last_subm_id - id + 1)/20).round # 20 items per page end
scrape_detail(url)
click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 50 def scrape_detail(url) uri = URI(url) uri = uri.host ? uri : "#{@url.scheme}://#{@url.host}/#{uri}" detail_page = Nokogiri::HTML(open(uri)) detail_page.at("#widecol").at_xpath("div/div[3]/b/text()").to_s end
scrape_parse()
click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 28 def scrape_parse rows = @page.at('.data').search('tr') rows.shift #removes header rows.collect do |row| id = row.at_xpath('td[1]/a/text()').to_s.strip url_id = row.at_xpath('td[1]/a/@href').to_s.strip url = row.at_xpath('td[2]/text()').to_s.strip url = scrape_detail(url_id) if url[-3,3] == "..." # incomplete url { id: id, url: url, created_at: row.at_xpath('td[2]/span/text()').to_s.strip, submitter: row.at_xpath('td[3]/a/text()').to_s.strip, valid: row.at_xpath('td[4]/text()').to_s.strip, online: row.at_xpath('td[5]/strong/text()').to_s.strip } end end