class HatebloMixedContentsFinder::MixedContentsFinder
Constants
- VALIDATE_CONDITIONS
Attributes
entire_page[R]
Public Class Methods
new(entire_page: false)
click to toggle source
# File lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb, line 7 def initialize(entire_page: false) @entire_page = entire_page end
Public Instance Methods
validate_all(site_url, limit: 3, sleep_sec: 1)
click to toggle source
# File lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb, line 11 def validate_all(site_url, limit: 3, sleep_sec: 1) puts "Validate #{site_url} / entire_page: #{entire_page}, limit: #{limit || 'none'}" invalid_contents = [] archive_url = File.join(site_url, 'archive') agent = Mechanize.new next_page_link = nil counter = 0 catch(:exit_loop) do begin list_url = next_page_link ? next_page_link['href'] : archive_url puts "Validating #{list_url}" page = agent.get(list_url) links = page.search('.entry-title-link') links.each do |link| over_limit = limit && (counter += 1) > limit throw :exit_loop if over_limit url = link['href'] invalid_contents += validate_entry(url) sleep sleep_sec end next_page_link = page.search('.pager-next a')&.first end while next_page_link end invalid_contents end
validate_entry(url)
click to toggle source
# File lib/hateblo_mixed_contents_finder/mixed_contents_finder.rb, line 53 def validate_entry(url) puts "[#{Time.now.strftime("%H:%M:%S")}] Validate #{url}" agent = Mechanize.new page = agent.get(url) root = entire_page ? '' : '.entry-content' VALIDATE_CONDITIONS.flat_map do |tag, attr| validator = ElementValidator.new(tag, attr, root) validator.validate(page) end end