class ShopeeScrape::ShopeeListGoodsByCate
¶ ↑
Class For List products by category and search products with keyword
¶ ↑
Constants
- GOOD_INFO
- GOOD_NAME
- GOOD_NUM
- GOOD_PRICE
- GOOD_UPTIME
Public Class Methods
new(category=nil, page=nil)
click to toggle source
# File lib/shopee.rb, line 20 def initialize(category=nil, page=nil) if !category.nil? && !page.nil? get_page_html(category, page) end end
Public Instance Methods
get_cate_childs(cate=nil)
click to toggle source
# File lib/shopee.rb, line 34 def get_cate_childs(cate=nil) childs = [] if !cate.nil? CATE_TREE[cate].each do |k, v| childs << {k => v} end childs else childs end end
goods()
click to toggle source
# File lib/shopee.rb, line 26 def goods @goods ||= extract_goods end
search_keyword(category, keyword, list_num)
click to toggle source
# File lib/shopee.rb, line 30 def search_keyword(category, keyword, list_num) @similar ||= find_similiar_goods(category, keyword, list_num) end
Private Instance Methods
extract_goods()
click to toggle source
# File lib/shopee.rb, line 60 def extract_goods results = [] @document.each do |doc| name = [] price = [] num = [] update_time = [] pic = [] link = [] doc.xpath(GOOD_NAME).map do |good| name << good.text end doc.xpath(GOOD_PRICE).map do |good| price << good.text end doc.xpath(GOOD_NUM).map do |good| num << good.text end doc.xpath(GOOD_UPTIME).map do |good| update_time << good.text end doc.xpath(GOOD_INFO).map do |good| link << good.css('a').attribute('href')[0].to_s pic << good.css('img').attribute('src')[0].to_s end number = name.length # puts number if number > 32 number = 32 end for i in 2..number-1 element = {} element['name'] = name[i] element['price'] = price[i-1] element['num'] = num[i-1] element['update_time'] = update_time[i-1] element['link'] = link[i] element['pic'] = pic[i] results << element end end results end
find_similiar_goods(category, keyword, list_num)
click to toggle source
# File lib/shopee.rb, line 115 def find_similiar_goods(category, keyword, list_num) goods = [] goodsSet = [] for i in 1..5 shopeecate = ShopeeScrape::ShopeeListGoodsByCate.new(category, i) goodsSet << shopeecate.goods end goodsSet.each do |good| good.each do |g| goods << g end end jarow = FuzzyStringMatch::JaroWinkler.create( :native ) rank = {} goods.each do |good| value = jarow.getDistance(good['name'] ,keyword) rank[good['name']] = value end rank_after_sort = Hash[rank.sort_by{|k, v| v}.reverse] key = rank_after_sort.keys() results = [] for i in 0..list_num.to_i - 1 good_name = key[i] goods.each do |good| if good['name'] == good_name results << good break end end end results end
get_page_html(cate, page)
click to toggle source
# File lib/shopee.rb, line 48 def get_page_html(cate, page) @document = [] url = ALL_LINK[cate] if page != 1 @document << Oga.parse_html(open(url << '&p=' << page.to_s)) else @document << Oga.parse_html(open(url)) end end