class MMonitor::Crawler

Attributes

adapter[RW]
page[RW]
page_key[RW]
pages[RW]
params[RW]
total[RW]
url[RW]

Public Class Methods

new(url) click to toggle source
# File lib/mmonitor/crawler.rb, line 8
def initialize(url)
      self.page = 1
  self.page_key = 'page'
  process(url)
end

Public Instance Methods

items() click to toggle source
# File lib/mmonitor/crawler.rb, line 14
def items
  self.adapter.items
end

Private Instance Methods

html() click to toggle source
# File lib/mmonitor/crawler.rb, line 74
def html
  if self.adapter.nil?
    Spider.get_html(self.url, self.params)
  else
    self.adapter.get_html(self.url, self.params) 
  end
end
next_page() click to toggle source
# File lib/mmonitor/crawler.rb, line 56
def next_page
  puts "分页提示:#{self.page}/#{self.pages}"
  puts '_'*88
  if self.pages > self.page
    self.page += 1
    self.params[self.page_key] = self.adapter.page(self.page)
    self.adapter.body = html
    self.adapter.process
  else
    self.adapter.extra
    self.adapter.body = nil
    self.adapter.item = nil
    puts "产品差异:#{self.items.count}/#{self.total}" unless self.total.nil?
    return nil
  end
  next_page
end
process(uri) click to toggle source

URL格式化

# File lib/mmonitor/crawler.rb, line 21
def process(uri)
   uri = Addressable::URI.parse(uri)
  host = uri.host
  self.url = "#{uri.scheme}://#{host}#{uri.path}"
  self.params = uri.query_values || {}

  self.adapter = case
  when host.include?('amazon.cn')
    Strategies::Amazon.new(html)
  when host.include?('jd.com')
    Strategies::Jd.new(html)
  when host.include?('jumei.com')
    Strategies::Jumei.new(html)
  when host.include?('lefeng.com')
    Strategies::Lefeng.new(html)
  when host.include?('suning.com')
    Strategies::Suning.new(html)
  when host.include?('tmall.com')
    Strategies::Tmall.new(html)
  when host.include?('yhd.com')
    Strategies::Yhd.new(html, self.url)
  when host.include?('yixun.com')
    Strategies::Yixun.new(html)
  else
    puts host
    puts '_'*88
    nil
  end 
  # 当前页数
  self.pages = self.adapter.pages
  self.total = self.adapter.total
  self.page_key = self.adapter.page_key
  next_page
end