class PhishingSet

Creates a collection of phishing hashes for each phishtank page Example {

id: "4141251",
url: "http://bintango.xyz/AIsaE",
created_at: "added on Jun 3rd 2016 3:57 PM",
submitter: "PhishReporter",
valid: "Unknown",
online: "ONLINE"

}

Attributes

all[R]
url[R]

Public Class Methods

new(url) click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 21
def initialize(url)
  @url = URI(url)

  @page = Nokogiri::HTML(open(@url.to_s))
  @all = scrape_parse
end

Public Instance Methods

each() { |ph| ... } click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 68
def each
  @all.each{ |ph| yield ph }
end
first() click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 59
def first
  @all.first
end
page_at_id(id) click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 63
def page_at_id(id)
  last_subm_id = self.first[:id].to_i
  ((last_subm_id - id + 1)/20).round # 20 items per page
end
scrape_detail(url) click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 50
def scrape_detail(url)

  uri = URI(url)
  uri = uri.host ? uri : "#{@url.scheme}://#{@url.host}/#{uri}"

  detail_page = Nokogiri::HTML(open(uri))
  detail_page.at("#widecol").at_xpath("div/div[3]/b/text()").to_s
end
scrape_parse() click to toggle source
# File lib/phishtank_scraper/phishing_set.rb, line 28
def scrape_parse
  rows = @page.at('.data').search('tr') 
  rows.shift #removes header

  rows.collect do |row|
    id = row.at_xpath('td[1]/a/text()').to_s.strip
    url_id = row.at_xpath('td[1]/a/@href').to_s.strip

    url = row.at_xpath('td[2]/text()').to_s.strip
    url = scrape_detail(url_id) if url[-3,3] == "..." # incomplete url

    {
      id: id,
      url: url,
      created_at: row.at_xpath('td[2]/span/text()').to_s.strip,
      submitter: row.at_xpath('td[3]/a/text()').to_s.strip,
      valid: row.at_xpath('td[4]/text()').to_s.strip,
      online: row.at_xpath('td[5]/strong/text()').to_s.strip
    }
  end
end