module Parser

todo parse all pages first, and download only newest later

Constants

VERSION

Public Class Methods

config() click to toggle source
# File lib/fly_parser.rb, line 98
def config
  YAML.load_file(CONFIG_PATH)
end
connect(url) click to toggle source
# File lib/fly_parser.rb, line 47
def connect(url)
  agent = Mechanize.new
  agent.pluggable_parser.default = Mechanize::Page
  agent.get(url)
end
find_source() click to toggle source
# File lib/fly_parser.rb, line 102
def find_source
  config["sources"].find {|source| source["enabled"] }
end
http(url) click to toggle source

Get HTTP Source

# File lib/fly_parser.rb, line 43
def http(url)
  Nokogiri::HTML(open(url))
end
init_parser(source) click to toggle source
# File lib/fly_parser.rb, line 110
def init_parser(source)
  source_type = source["source"].gsub('-', '_')
  prefix = "enable_"
  send(prefix + source_type, source)
end
parse_and_save(items) click to toggle source

Threads are more slowly in this case, so don’t use it because we need delay(some sources ban you for a lot of requests concurrently)

# File lib/fly_parser.rb, line 80
def parse_and_save(items)
  items.each do |item|
    ap "Parsing #{item['type']}"
    result = parse_item(item)
    save_item(item, result)
  end
end
parse_item(item) click to toggle source
# File lib/fly_parser.rb, line 94
def parse_item(item)
  item["parser"].parse_all
end
save(articles, options) click to toggle source
# File lib/fly_parser.rb, line 53
def save(articles, options)
  articles.each do |article|
    item = Article.new(title: article[:title], content: article[:content])
    item.categories = [Category.find(options[:category_id])]
    item.remote_image_url = article[:poster_image]
    next unless item.valid?
    item.save

    item.tags << tags(article[:tags]) if article[:tags]

  end
end
save_item(item, result) click to toggle source
# File lib/fly_parser.rb, line 88
def save_item(item, result)
  category = Category.find_or_create_by!(name: JSON.generate(en: item["category"]))
  ap "and save to #{category.localized_name} category"
  Parser.save result, {category_id: category.id}
end
start() click to toggle source
# File lib/fly_parser.rb, line 70
def start
  puts Time.now

  source = find_source

  init_parser(source)
  parse_and_save(source["items"])
end
tags(tags) click to toggle source
# File lib/fly_parser.rb, line 66
def tags(tags)
  tags.map { |tag_name| Tag.find_or_create_by!(title: tag_name) }
end