class MMonitor::Crawler
Attributes
adapter[RW]
page[RW]
page_key[RW]
pages[RW]
params[RW]
total[RW]
url[RW]
Public Class Methods
new(url)
click to toggle source
# File lib/mmonitor/crawler.rb, line 8 def initialize(url) self.page = 1 self.page_key = 'page' process(url) end
Public Instance Methods
items()
click to toggle source
# File lib/mmonitor/crawler.rb, line 14 def items self.adapter.items end
Private Instance Methods
html()
click to toggle source
# File lib/mmonitor/crawler.rb, line 74 def html if self.adapter.nil? Spider.get_html(self.url, self.params) else self.adapter.get_html(self.url, self.params) end end
next_page()
click to toggle source
# File lib/mmonitor/crawler.rb, line 56 def next_page puts "分页提示:#{self.page}/#{self.pages}" puts '_'*88 if self.pages > self.page self.page += 1 self.params[self.page_key] = self.adapter.page(self.page) self.adapter.body = html self.adapter.process else self.adapter.extra self.adapter.body = nil self.adapter.item = nil puts "产品差异:#{self.items.count}/#{self.total}" unless self.total.nil? return nil end next_page end
process(uri)
click to toggle source
URL格式化
# File lib/mmonitor/crawler.rb, line 21 def process(uri) uri = Addressable::URI.parse(uri) host = uri.host self.url = "#{uri.scheme}://#{host}#{uri.path}" self.params = uri.query_values || {} self.adapter = case when host.include?('amazon.cn') Strategies::Amazon.new(html) when host.include?('jd.com') Strategies::Jd.new(html) when host.include?('jumei.com') Strategies::Jumei.new(html) when host.include?('lefeng.com') Strategies::Lefeng.new(html) when host.include?('suning.com') Strategies::Suning.new(html) when host.include?('tmall.com') Strategies::Tmall.new(html) when host.include?('yhd.com') Strategies::Yhd.new(html, self.url) when host.include?('yixun.com') Strategies::Yixun.new(html) else puts host puts '_'*88 nil end # 当前页数 self.pages = self.adapter.pages self.total = self.adapter.total self.page_key = self.adapter.page_key next_page end