class StaticImageDownloader::Parser
Constants
- PARSER_OPTIONS
Attributes
content[RW]
extracted_links[RW]
images[RW]
parse_option[RW]
url[RW]
user_agent[RW]
Public Class Methods
default_parse_option()
click to toggle source
# File lib/static_image_download/parser.rb, line 36 def default_parse_option @@DEFAULTPARSEOPTION end
default_path()
click to toggle source
# File lib/static_image_download/parser.rb, line 44 def default_path @@DEFAULTPATH end
default_timeout()
click to toggle source
# File lib/static_image_download/parser.rb, line 48 def default_timeout @@DEFAULTTIMEOUT end
default_user_agent()
click to toggle source
# File lib/static_image_download/parser.rb, line 40 def default_user_agent @@DEFAULTUSERAGENT end
new(url=@@DEFAULTSITE, path=@@DEFAULTPATH, parse_option=@@DEFAULTPARSEOPTION, timeout=@@DEFAULTTIMEOUT, user_agent=@@DEFAULTUSERAGENT, h={})
click to toggle source
# File lib/static_image_download/parser.rb, line 21 def initialize(url=@@DEFAULTSITE, path=@@DEFAULTPATH, parse_option=@@DEFAULTPARSEOPTION, timeout=@@DEFAULTTIMEOUT, user_agent=@@DEFAULTUSERAGENT, h={}) @url = url.nil? ? @@DEFAULTSITE : url @user_agent = user_agent.nil? ? @@DEFAULTUSERAGENT : user_agent @path = path.nil? ? @@DEFAULTPATH : path @timeout = timeout.nil? ? @@DEFAULTTIMEOUT : timeout @parse_option = parse_option.nil? ? @@DEFAULTPARSEOPTION : parse_option @images = [] @extracted_links = [] @rgxp_img_uri = Regexp.new(/^(http|https|ftp)\:\/\/([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)?((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.[a-zA-Z]{2,4})(\:[0-9]+)?(\/[^\/][a-zA-Z0-9\.\,\?\'\\\/\+&%\$#\=~_\-@]*)\.(#{Images::IMAGE_EXT.join('|')})/i) #@rgxp_img_uri = Regexp.new(/^(((http|https|ftp)\:\/\/)|www|(\/\/))([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)?((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.[a-zA-Z]{2,4})(\:[0-9]+)?(\/[^\/][a-zA-Z0-9\.\,\?\'\\\/\+&%\$#\=~_\-@]*)\.(#{Images::IMAGE_EXT.join('|')})/i) @domain = URI.parse(url).host @content = nil end
Public Instance Methods
collect_images()
click to toggle source
# File lib/static_image_download/parser.rb, line 116 def collect_images @extracted_links.each do |link| self.push_image(link) end end
get_content_raw()
click to toggle source
# File lib/static_image_download/parser.rb, line 69 def get_content_raw @content = self.get_url.read @content.gsub!(/[\n\r\t]+/,' ') #p @content if $debug_option end
get_extracted_links(links)
click to toggle source
# File lib/static_image_download/parser.rb, line 93 def get_extracted_links(links) return false unless links links.each do |link| p "link= #{link}" if $debug_option link = link[:src].to_s unless link.is_a?(String) @extracted_links << link.match(@rgxp_img_uri)[0] if link.match(@rgxp_img_uri) and !@extracted_links.include?(link.match(@rgxp_img_uri)[0]) end #p "extracted_links= #{@extracted_links}" if $debug_option end
get_url()
click to toggle source
# File lib/static_image_download/parser.rb, line 75 def get_url open(self.url, 'User-Agent' => self.user_agent) end
img_parse_hpricot(h={})
click to toggle source
# File lib/static_image_download/parser.rb, line 84 def img_parse_hpricot(h={}) doc = Hpricot(@content) get_extracted_links(doc.search("//img")) end
img_parse_nokogiri(h={})
click to toggle source
# File lib/static_image_download/parser.rb, line 79 def img_parse_nokogiri(h={}) doc = Nokogiri::HTML(@content) get_extracted_links(doc.search("//img")) end
img_parse_uri_extract(h={})
click to toggle source
# File lib/static_image_download/parser.rb, line 89 def img_parse_uri_extract(h={}) get_extracted_links(URI.extract(@content).select{ |l| l[/#{@rgxp_img_uri}/] }) end
method_to_value(option, h={})
click to toggle source
# File lib/static_image_download/parser.rb, line 57 def method_to_value(option, h={}) method = option_to_method(option) p "method= #{method}" if $debug_option begin response = send(method, h) || "" return response rescue => error p "method_to_value.error = #{error}" nil end end
option_to_method(option)
click to toggle source
# File lib/static_image_download/parser.rb, line 53 def option_to_method(option) opt = PARSER_OPTIONS[option] end
parse_images(h={})
click to toggle source
# File lib/static_image_download/parser.rb, line 103 def parse_images(h={}) begin response = nil status = Timeout::timeout(@timeout) { response = method_to_value(self.parse_option, h) collect_images } rescue => error p "#{error}" nil end end
push_image(src)
click to toggle source
# File lib/static_image_download/parser.rb, line 122 def push_image(src) self.images.push Images.new(src, @path, Images.default_download_option) end