class NewsScraper::Trainer::UrlTrainer
Public Class Methods
new(url)
click to toggle source
# File lib/news_scraper/trainer/url_trainer.rb, line 4 def initialize(url) @url = url @root_domain = URIParser.new(@url).host @payload = Extractors::Article.new(url: @url).extract end
Public Instance Methods
train()
click to toggle source
# File lib/news_scraper/trainer/url_trainer.rb, line 10 def train return if NewsScraper.configuration.scrape_patterns['domains'].key?(@root_domain) CLI.put_header(@root_domain) CLI.log("There is no scrape pattern defined for #{@root_domain}") CLI.log "Fetching information..." CLI.put_footer selected_presets = {} NewsScraper.configuration.scrape_patterns['data_types'].each do |data_type| selected_presets[data_type] = selected_pattern(data_type) end save_selected_presets(selected_presets) end
Private Instance Methods
build_domain_yaml(selected_presets)
click to toggle source
# File lib/news_scraper/trainer/url_trainer.rb, line 54 def build_domain_yaml(selected_presets) spacer = " " output_string = ["#{spacer}#{@root_domain}:"] selected_presets.each do |data_type, spec| if spec.include?('variable') output_string << (spacer * 2) + "#{data_type}: *#{spec['variable']}" else output_string << (spacer * 2) + "#{data_type}:" spec.each { |k, v| output_string << (spacer * 3) + "#{k}: #{v}" } end end output_string.join("\n") end
preset_selector()
click to toggle source
# File lib/news_scraper/trainer/url_trainer.rb, line 39 def preset_selector @preset_selector ||= PresetSelector.new(url: @url, payload: @payload) end
save_selected_presets(selected_presets)
click to toggle source
# File lib/news_scraper/trainer/url_trainer.rb, line 43 def save_selected_presets(selected_presets) return unless NewsScraper.configuration.scrape_patterns_filepath current_content = File.read(NewsScraper.configuration.scrape_patterns_filepath).chomp new_content = "#{current_content}\n#{build_domain_yaml(selected_presets)}\n" File.write(NewsScraper.configuration.scrape_patterns_filepath, new_content) CLI.log("Successfully wrote presets for #{@root_domain} to"\ " #{NewsScraper.configuration.scrape_patterns_filepath}.") end
selected_pattern(data_type)
click to toggle source
# File lib/news_scraper/trainer/url_trainer.rb, line 27 def selected_pattern(data_type) CLI.put_header("Determining information for #{data_type}") pattern = if NewsScraper.configuration.scrape_patterns['presets'][data_type].nil? CLI.log("No presets were found for #{data_type}. Skipping to next.") nil else preset_selector.select(data_type) end CLI.put_footer pattern || { 'method' => "<<<<< TODO >>>>>", 'pattern' => "<<<<< TODO >>>>>" } end