module Parser
todo parse all pages first, and download only newest later
Constants
- VERSION
Public Class Methods
config()
click to toggle source
# File lib/fly_parser.rb, line 98 def config YAML.load_file(CONFIG_PATH) end
connect(url)
click to toggle source
# File lib/fly_parser.rb, line 47 def connect(url) agent = Mechanize.new agent.pluggable_parser.default = Mechanize::Page agent.get(url) end
find_source()
click to toggle source
# File lib/fly_parser.rb, line 102 def find_source config["sources"].find {|source| source["enabled"] } end
http(url)
click to toggle source
Get HTTP Source
# File lib/fly_parser.rb, line 43 def http(url) Nokogiri::HTML(open(url)) end
init_parser(source)
click to toggle source
# File lib/fly_parser.rb, line 110 def init_parser(source) source_type = source["source"].gsub('-', '_') prefix = "enable_" send(prefix + source_type, source) end
logo()
click to toggle source
# File lib/fly_parser.rb, line 106 def logo File.read(LOGO_PATH) end
parse_and_save(items)
click to toggle source
Threads are more slowly in this case, so don’t use it because we need delay(some sources ban you for a lot of requests concurrently)
# File lib/fly_parser.rb, line 80 def parse_and_save(items) items.each do |item| ap "Parsing #{item['type']}" result = parse_item(item) save_item(item, result) end end
parse_item(item)
click to toggle source
# File lib/fly_parser.rb, line 94 def parse_item(item) item["parser"].parse_all end
save(articles, options)
click to toggle source
# File lib/fly_parser.rb, line 53 def save(articles, options) articles.each do |article| item = Article.new(title: article[:title], content: article[:content]) item.categories = [Category.find(options[:category_id])] item.remote_image_url = article[:poster_image] next unless item.valid? item.save item.tags << tags(article[:tags]) if article[:tags] end end
save_item(item, result)
click to toggle source
# File lib/fly_parser.rb, line 88 def save_item(item, result) category = Category.find_or_create_by!(name: JSON.generate(en: item["category"])) ap "and save to #{category.localized_name} category" Parser.save result, {category_id: category.id} end
start()
click to toggle source
# File lib/fly_parser.rb, line 70 def start puts Time.now source = find_source init_parser(source) parse_and_save(source["items"]) end