module NHKore::CLI::SearchCmd
@author Jonathan Bradley Whited @since 0.3.0
Public Instance Methods
build_search_cmd()
click to toggle source
# File lib/nhkore/cli/search_cmd.rb, line 25 def build_search_cmd app = self @search_cmd = @app_cmd.define_command do name 'search' usage 'search [OPTIONS] [COMMAND]...' aliases :se,:sea summary "Search for links to NHK News Web (Easy) (aliases: #{app.color_alias('se sea')})" description <<-DESC Search for links (using a Search Engine, etc.) to NHK News Web (Easy) & save to folder: #{SearchLinks::DEFAULT_DIR} DESC option :i,:in,<<-DESC,argument: :required,transform: lambda { |value| file to read instead of URL (for offline testing and/or slow internet; see '--show-*' options) DESC app.check_empty_opt(:in,value) } option :o,:out,<<-DESC,argument: :required,transform: lambda { |value| 'directory/file' to save links to; if you only specify a directory or a file, it will attach the appropriate default directory/file name (defaults: #{SearchLinks::DEFAULT_YASASHII_FILE}, #{SearchLinks::DEFAULT_FUTSUU_FILE}) DESC app.check_empty_opt(:out,value) } option :r,:results,'number of results per page to request from search',argument: :required, default: SearchScraper::DEFAULT_RESULT_COUNT,transform: lambda { |value| value = value.to_i value = 1 if value < 1 value } option nil,:'show-count',<<-DESC show the number of links scraped and exit; useful for manually writing/updating scripts (but not for use in a variable); implies '--dry-run' option DESC option nil,:'show-urls',<<-DESC show the URLs -- if any -- used when searching & scraping and exit; you can download these for offline testing and/or slow internet (see '--in' option) DESC run do |opts,args,cmd| opts.each do |key,value| key = key.to_s if key.include?('show') raise CLIError,"must specify a sub command for option[#{key}]" end end puts cmd.help end end @search_easy_cmd = @search_cmd.define_command do name 'easy' usage 'easy [OPTIONS] [COMMAND]...' aliases :e,:ez summary "Search for NHK News Web Easy (Yasashii) links (aliases: #{app.color_alias('e ez')})" description <<-DESC Search for NHK News Web Easy (Yasashii) links & save to file: #{SearchLinks::DEFAULT_YASASHII_FILE} DESC run do |opts,args,cmd| app.refresh_cmd(opts,args,cmd) app.run_search_help end end @search_regular_cmd = @search_cmd.define_command do name 'regular' usage 'regular [OPTIONS] [COMMAND]...' aliases :r,:reg summary "Search for NHK News Web Regular (Futsuu) links (aliases: #{app.color_alias('r reg')})" description <<-DESC Search for NHK News Web Regular (Futsuu) links & save to file: #{SearchLinks::DEFAULT_FUTSUU_FILE} DESC run do |opts,args,cmd| app.refresh_cmd(opts,args,cmd) app.run_search_help end end @search_bing_cmd = Cri::Command.define do name 'bing' usage 'bing [OPTIONS] [COMMAND]...' aliases :b summary "Search bing.com for links (aliases: #{app.color_alias('b')})" description <<-DESC Search bing.com for links & save to folder: #{SearchLinks::DEFAULT_DIR} DESC run do |opts,args,cmd| app.refresh_cmd(opts,args,cmd) app.run_search_cmd(cmd.supercommand.name.to_sym,:bing) end end # dup()/clone() must be called for `cmd.supercommand` to work appropriately. @search_easy_cmd.add_command @search_bing_cmd.dup @search_regular_cmd.add_command @search_bing_cmd.dup end
run_search_cmd(nhk_type,search_type)
click to toggle source
# File lib/nhkore/cli/search_cmd.rb, line 137 def run_search_cmd(nhk_type,search_type) case nhk_type when :easy nhk_type = :yasashii when :regular nhk_type = :futsuu end return if show_search_urls(search_type) @cmd_opts[:dry_run] = true if @cmd_opts[:show_count] build_in_file(:in) case nhk_type when :futsuu build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR, default_filename: SearchLinks::DEFAULT_FUTSUU_FILENAME) when :yasashii build_out_file(:out,default_dir: SearchLinks::DEFAULT_DIR, default_filename: SearchLinks::DEFAULT_YASASHII_FILENAME) else raise ArgumentError,"invalid nhk_type[#{nhk_type}]" end return unless check_in_file(:in,empty_ok: true) return unless check_out_file(:out) dry_run = @cmd_opts[:dry_run] in_file = @cmd_opts[:in] out_file = @cmd_opts[:out] result_count = @cmd_opts[:results] result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil? show_count = @cmd_opts[:show_count] start_spin("Scraping #{search_type}") unless show_count is_file = !in_file.nil? links = nil new_links = [] # For --dry-run next_page = NextPage.new page_count = 0 page_num = 1 url = in_file # nil will use default URL, else a file # Load previous links for 'scraped?' vars. if File.exist?(out_file) links = SearchLinks.load_file(out_file) else links = SearchLinks.new end links_count = links.length if show_count scraped_count = 0 links.links.each_value do |link| scraped_count += 1 if link.scraped? end puts "#{scraped_count} of #{links_count} links scraped." return end range = (0..10_000) # Do a range to prevent an infinite loop; ichiman! case search_type # Anything that extends SearchScraper. when :bing range.each do scraper = nil case search_type when :bing scraper = BingScraper.new(nhk_type,count: result_count,is_file: is_file,url: url,**@scraper_kargs) else raise NHKore::Error,"internal code broken; add missing search_type[#{search_type}]" end next_page = scraper.scrape(links,next_page) new_links.concat(links.links.values[links_count..-1]) links_count = links.length page_count = next_page.count if next_page.count > 0 update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}," \ " new_links=#{new_links.length})") break if next_page.empty? page_num += 1 url = next_page.url sleep_scraper end else raise ArgumentError,"invalid search_type[#{search_type}]" end stop_spin puts puts 'Last URL scraped:' puts "> #{url}" puts if dry_run new_links.each do |link| puts link.to_s(mini: true) end else links.save_file(out_file) puts 'Saved scraped links to file:' puts "> #{out_file}" end end
run_search_help()
click to toggle source
# File lib/nhkore/cli/search_cmd.rb, line 256 def run_search_help if @cmd_opts[:show_count] || @cmd_opts[:show_urls] run_search_cmd(@cmd.name.to_sym,nil) else puts @cmd.help end end
show_search_urls(search_type)
click to toggle source
# File lib/nhkore/cli/search_cmd.rb, line 264 def show_search_urls(search_type) return false unless @cmd_opts[:show_urls] count = @cmd_opts[:results] count = SearchScraper::DEFAULT_RESULT_COUNT if count.nil? case search_type when :bing puts 'Bing:' puts "> Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE,count: count)}" puts "> Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE,count: count)}" else raise CLIError,'must specify a sub command for option[show-urls]' end return true end