module Serper::Helper
Public Class Methods
get_content_safe(noko)
click to toggle source
get content safe from nokogiri search reasult
# File lib/serper/helper.rb, line 7 def get_content_safe(noko) return nil if noko.nil? return nil if noko.empty? noko.first.content.strip end
normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
click to toggle source
normalize weight of given data, the data must be a hash array structure. for example : [{a: 1, b: 2}, {a: 2, b: 3}]
# File lib/serper/helper.rb, line 27 def normalize(data,weight_col=:weight,normalized_col=:normalized_weight) total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f} data.each do|d| d[normalized_col] = d[weight_col].to_f/total_weight end data end
parse_data_click(str)
click to toggle source
parse data click value from baidu div property, which is a JSON like format
# File lib/serper/helper.rb, line 15 def parse_data_click(str) JSON.parse(str .gsub("'",'"') .gsub(/({|,)([a-zA-Z0-9_]+):/, '\1"\2":') #.gsub(/'*([a-zA-Z0-9_]+)'*:/, '"\1":') #.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2') ) end
parse_path(url)
click to toggle source
# File lib/serper/helper.rb, line 63 def parse_path(url) begin url = Domainatrix.parse(url.to_s) path = url.path rescue Exception => e puts "parse_site from url error:" puts url puts e.class puts e.message path = '' end path end
parse_site(url)
click to toggle source
# File lib/serper/helper.rb, line 35 def parse_site(url) begin url = Domainatrix.parse(url.to_s) site = url.domain + '.' + url.public_suffix rescue Exception => e puts "parse_site from url error:" puts url puts e.class puts e.message site = '' end site end
parse_subdomain(url)
click to toggle source
# File lib/serper/helper.rb, line 49 def parse_subdomain(url) begin url = Domainatrix.parse(url.to_s) subdomain = url.subdomain rescue Exception => e puts "parse_site from url error:" puts url puts e.class puts e.message subdomain = '' end subdomain end