class Apollo::Crawler::HackerNewsCrawler
Public Instance Methods
extract_data(doc)
click to toggle source
# File lib/apollo_crawler/crawler/hacker_news_crawler.rb, line 36 def extract_data(doc) res = doc.xpath(@@MATCHER_ITEM).map { |node| url = BaseCrawler.try_get_url(self.url, node['href']).to_s next if url.nil? { :text => node.text, :link => url } } return res end
extract_links(doc)
click to toggle source
# File lib/apollo_crawler/crawler/hacker_news_crawler.rb, line 50 def extract_links(doc) res = doc.xpath("(//td[@class = 'title']/a)[last()]").map { |node| url = BaseCrawler.try_get_url(self.url, node['href']).to_s next if url.nil? { :link => url } } return res.uniq end
name()
click to toggle source
# File lib/apollo_crawler/crawler/hacker_news_crawler.rb, line 28 def name() return "Hacker News" end
url()
click to toggle source
# File lib/apollo_crawler/crawler/hacker_news_crawler.rb, line 32 def url() return "http://news.ycombinator.com/" end