class Baiduserp::Analyser
Public Class Methods
new(name,attrs={})
click to toggle source
Dir[File.expand_path(‘../analyser/*.rb’, __FILE__)].each{|f| require f}
# File lib/baiduserp/analyser.rb, line 11 def initialize(name,attrs={}) @name = name Dir.mkdir @name unless Dir.exists? @name # store htmls and serps data under the dir @db_file = @name + ".sqlite" @attrs = attrs @keywords_imported = File.exists?(@db_file) @db = Sequel.sqlite(@db_file) migrate! @keywords = Class.new(Sequel::Model(@db[:keywords])) @weights = Class.new(Sequel::Model(@db[:weights])) import_keywords unless @keywords_imported end
Public Instance Methods
_analyse_competitors(date=Date.today)
click to toggle source
# File lib/baiduserp/analyser.rb, line 120 def _analyse_competitors(date=Date.today) sites = Hash.new(0) @serps.where(:date => date).each do |serp| serp = YAML.load(serp[:content]) serp.sem_sites.each {|site| sites[site] += 1} end puts YAML.dump(sites) end
generate_weights(date=Date.today)
click to toggle source
# File lib/baiduserp/analyser.rb, line 86 def generate_weights(date=Date.today) serps = model_serps(date) p = ProgressBar.create(:title => "Generating Weights", :total => serps.count, :format => '%t (%c/%C) %a %E |%w') serps.each do |s| keyword_id = s[:keyword_id] serp = YAML.load(s[:content]) serp.weights.each do |w| type = w[:type] name = w[:name] site = w[:site] subdomain = w[:subdomain] url = w[:url] side_rank = w[:side_rank] weight = w[:weight] side = w[:side] part = w[:part] normalized_weight = w[:normalized_weight] @weights.find_or_create(:date => date, :keyword_id => keyword_id, :side => side, :side_rank => side_rank) do |r| r.type = type r.name = name r.site = site r.subdomain = subdomain r.url = url r.weight = weight r.part = part r.normalized_weight = normalized_weight end end p.increment end end
import_keywords(file=@attrs[:keywords])
click to toggle source
# File lib/baiduserp/analyser.rb, line 39 def import_keywords(file=@attrs[:keywords]) CSV.foreach(file) do |l| @keywords.find_or_create(:term => l[0]) do |r| r.search_volume = l[1] r.category = l[2] end end end
migrate!(db = @db, schema = 'weights')
click to toggle source
# File lib/baiduserp/analyser.rb, line 34 def migrate!(db = @db, schema = 'weights') Sequel.extension :migration, :core_extensions Sequel::Migrator.apply(db, File.expand_path("../migrations/#{schema}/",__FILE__)) end
model_htmls(date=Date.today)
click to toggle source
# File lib/baiduserp/analyser.rb, line 48 def model_htmls(date=Date.today) db = Sequel.sqlite("#{@name}/htmls_#{date}.sqlite") migrate!(db, 'htmls') Class.new(Sequel::Model(db[:htmls])) end
model_serps(date=Date.today)
click to toggle source
# File lib/baiduserp/analyser.rb, line 54 def model_serps(date=Date.today) db = Sequel.sqlite("#{@name}/serps_#{date}.sqlite") migrate!(db, 'serps') Class.new(Sequel::Model(db[:serps])) end
regenerate_serps(date=Date.today)
click to toggle source
# File lib/baiduserp/analyser.rb, line 72 def regenerate_serps(date=Date.today) htmls = model_htmls(date) serps = model_serps(date) p = ProgressBar.create(:title => "ReGenerating SERPS", :total => htmls.count, :format => '%t (%c/%C) %a %E |%w') htmls.each do |html| keyword_id = html[:keyword_id] html = html[:content] r = serps.find_or_create(:keyword_id => keyword_id) r.update(:content => YAML.dump(Baiduserp.parse(html))) p.increment end end
run(date=Date.today)
click to toggle source
# File lib/baiduserp/analyser.rb, line 29 def run(date=Date.today) search(date) generate_weights(date) end
search(date=Date.today)
click to toggle source
Search Keywords -> Store Html -> Parse SERP
# File lib/baiduserp/analyser.rb, line 61 def search(date=Date.today) htmls = model_htmls(date) serps = model_serps(date) p = ProgressBar.create(:title => "Searching Keywords", :total => @keywords.all.count, :format => '%t (%c/%C) %a %E |%w') @keywords.each do |k| htmls.find_or_create(:keyword_id => k[:id]) {|r| r.content = Baiduserp.get_search_html(k[:term]) } serps.find_or_create(:keyword_id => k[:id]) {|r| r.content = YAML.dump(Baiduserp.parse(htmls.where(:keyword_id => k[:id]).first[:content])) } p.increment end end