class Baiduserp::Analyser

Public Class Methods

new(name,attrs={}) click to toggle source

Dir[File.expand_path(‘../analyser/*.rb’, __FILE__)].each{|f| require f}

# File lib/baiduserp/analyser.rb, line 11
def initialize(name,attrs={})
  @name = name
  Dir.mkdir @name unless Dir.exists? @name # store htmls and serps data under the dir

  @db_file = @name + ".sqlite"
  @attrs = attrs
  @keywords_imported = File.exists?(@db_file)

  @db = Sequel.sqlite(@db_file)

  migrate!

  @keywords = Class.new(Sequel::Model(@db[:keywords]))
  @weights = Class.new(Sequel::Model(@db[:weights]))

  import_keywords unless @keywords_imported
end

Public Instance Methods

_analyse_competitors(date=Date.today) click to toggle source
# File lib/baiduserp/analyser.rb, line 120
def _analyse_competitors(date=Date.today)
  sites = Hash.new(0)
  @serps.where(:date => date).each do |serp|
    serp = YAML.load(serp[:content])
    serp.sem_sites.each {|site| sites[site] += 1}
  end
  puts YAML.dump(sites)
end
generate_weights(date=Date.today) click to toggle source
# File lib/baiduserp/analyser.rb, line 86
def generate_weights(date=Date.today)
  serps = model_serps(date)
  p = ProgressBar.create(:title => "Generating Weights", :total => serps.count, :format => '%t (%c/%C) %a %E |%w')
  serps.each do |s|
    keyword_id = s[:keyword_id]
    serp = YAML.load(s[:content])

    serp.weights.each do |w|
      type = w[:type]
      name = w[:name]
      site = w[:site]
      subdomain = w[:subdomain]
      url = w[:url]
      side_rank = w[:side_rank]
      weight = w[:weight]
      side = w[:side]
      part = w[:part]
      normalized_weight = w[:normalized_weight]

      @weights.find_or_create(:date => date, :keyword_id => keyword_id, :side => side, :side_rank => side_rank) do |r|
        r.type = type
        r.name = name
        r.site = site
        r.subdomain = subdomain
        r.url = url
        r.weight = weight
        r.part = part
        r.normalized_weight = normalized_weight
      end
    end
    p.increment
  end
end
import_keywords(file=@attrs[:keywords]) click to toggle source
# File lib/baiduserp/analyser.rb, line 39
def import_keywords(file=@attrs[:keywords])
  CSV.foreach(file) do |l|
    @keywords.find_or_create(:term => l[0]) do |r| 
      r.search_volume = l[1]
      r.category = l[2]
    end
  end
end
migrate!(db = @db, schema = 'weights') click to toggle source
# File lib/baiduserp/analyser.rb, line 34
def migrate!(db = @db, schema = 'weights')
  Sequel.extension :migration, :core_extensions
  Sequel::Migrator.apply(db, File.expand_path("../migrations/#{schema}/",__FILE__))
end
model_htmls(date=Date.today) click to toggle source
# File lib/baiduserp/analyser.rb, line 48
def model_htmls(date=Date.today)
  db = Sequel.sqlite("#{@name}/htmls_#{date}.sqlite")
  migrate!(db, 'htmls')
  Class.new(Sequel::Model(db[:htmls]))
end
model_serps(date=Date.today) click to toggle source
# File lib/baiduserp/analyser.rb, line 54
def model_serps(date=Date.today)
  db = Sequel.sqlite("#{@name}/serps_#{date}.sqlite")
  migrate!(db, 'serps')
  Class.new(Sequel::Model(db[:serps]))
end
regenerate_serps(date=Date.today) click to toggle source
# File lib/baiduserp/analyser.rb, line 72
def regenerate_serps(date=Date.today)
  htmls = model_htmls(date)
  serps = model_serps(date)
  p = ProgressBar.create(:title => "ReGenerating SERPS", :total => htmls.count, :format => '%t (%c/%C) %a %E |%w')
  htmls.each do |html|
    keyword_id = html[:keyword_id]
    html = html[:content]
    r = serps.find_or_create(:keyword_id => keyword_id) 
    r.update(:content => YAML.dump(Baiduserp.parse(html)))

    p.increment
  end
end
run(date=Date.today) click to toggle source
# File lib/baiduserp/analyser.rb, line 29
def run(date=Date.today)
  search(date)
  generate_weights(date)
end