class Apollo::Crawler::SpiderCrawler
Public Instance Methods
extract_data(doc)
click to toggle source
# File lib/apollo_crawler/crawler/spider_crawler.rb, line 34 def extract_data(doc) [] end
extract_links(doc)
click to toggle source
# File lib/apollo_crawler/crawler/spider_crawler.rb, line 38 def extract_links(doc) res = doc.xpath("//a").map { |node| url = BaseCrawler.try_get_url(self.url, node['href']).to_s next if url.nil? { :link => url } } return res.uniq end
name()
click to toggle source
# File lib/apollo_crawler/crawler/spider_crawler.rb, line 26 def name() return "Spider" end
url()
click to toggle source
# File lib/apollo_crawler/crawler/spider_crawler.rb, line 30 def url() return "http://www.wikipedia.org/" end