class IML::IMDB
IMDB
scraping
Attributes
doc[RW]
result[RW]
Public Class Methods
new(query)
click to toggle source
Start a IMDB
query @return [Array<Media::Movie, Media::TVSeries>] Array of Media objects
# File lib/iml/imdb.rb, line 10 def initialize(query) query = CGI.escape(query) html = open("https://www.imdb.com/find?q=#{query}&ref_=nv_sr_fn", 'X-Forwarded-For' => '35.228.112.200').read @doc = Nokogiri::HTML(html) @result = [] search end
Private Instance Methods
fetch_type(elem)
click to toggle source
# File lib/iml/imdb.rb, line 32 def fetch_type(elem) elem.children[2].to_s.strip end
game?(elem)
click to toggle source
# File lib/iml/imdb.rb, line 44 def game?(elem) fetch_type(elem).match(/Video Game/) || false end
href(elem)
click to toggle source
# File lib/iml/imdb.rb, line 52 def href(elem) elem.children[1].attr(:href) end
movie?(elem)
click to toggle source
# File lib/iml/imdb.rb, line 40 def movie?(elem) fetch_type(elem).match(/\((?<year>\d{4})\)/) || false end
parsable_element(elem)
click to toggle source
# File lib/iml/imdb.rb, line 20 def parsable_element(elem) elem.children[1] && (elem.css('i').first || elem.children[1].child.to_s) && elem.children[1].attr(:href) =~ /title/ end
parse_title(elem)
click to toggle source
# File lib/iml/imdb.rb, line 28 def parse_title(elem) title_first_choice(elem) || elem.children[1].child.to_s end
processable_elements()
click to toggle source
# File lib/iml/imdb.rb, line 48 def processable_elements @processable_elements ||= @doc.css('.result_text').select { |e| parsable_element(e) && !game?(e) } end
search()
click to toggle source
# File lib/iml/imdb.rb, line 60 def search processable_elements.each do |elem| attrs = { title: parse_title(elem), href: href(elem), year: year(elem) } if tv?(elem) media = IML::TVSeries.new(attrs) elsif movie?(elem) media = IML::Movie.new(attrs) end result.push media end end
title_first_choice(elem)
click to toggle source
# File lib/iml/imdb.rb, line 24 def title_first_choice(elem) elem.css('i').first && elem.css('i').first.child.to_s.delete('"') end
tv?(elem)
click to toggle source
# File lib/iml/imdb.rb, line 36 def tv?(elem) fetch_type(elem).match(/\((?<year>\d{4})\) \(TV Series\)/) || false end
year(elem)
click to toggle source
# File lib/iml/imdb.rb, line 56 def year(elem) fetch_type(elem).match(/\((?<year>\d{4})\)/).named_captures['year'] end