class StaticImageDownloader::Parser

Constants

PARSER_OPTIONS

Attributes

content[RW]
images[RW]
parse_option[RW]
url[RW]
user_agent[RW]

Public Class Methods

default_parse_option() click to toggle source
# File lib/static_image_download/parser.rb, line 36
def default_parse_option
        @@DEFAULTPARSEOPTION
end
default_path() click to toggle source
# File lib/static_image_download/parser.rb, line 44
def default_path
        @@DEFAULTPATH
end
default_timeout() click to toggle source
# File lib/static_image_download/parser.rb, line 48
def default_timeout
        @@DEFAULTTIMEOUT
end
default_user_agent() click to toggle source
# File lib/static_image_download/parser.rb, line 40
def default_user_agent
        @@DEFAULTUSERAGENT
end
new(url=@@DEFAULTSITE, path=@@DEFAULTPATH, parse_option=@@DEFAULTPARSEOPTION, timeout=@@DEFAULTTIMEOUT, user_agent=@@DEFAULTUSERAGENT, h={}) click to toggle source
# File lib/static_image_download/parser.rb, line 21
def initialize(url=@@DEFAULTSITE, path=@@DEFAULTPATH, parse_option=@@DEFAULTPARSEOPTION, timeout=@@DEFAULTTIMEOUT, user_agent=@@DEFAULTUSERAGENT, h={})
        @url                                 = url.nil? ? @@DEFAULTSITE : url
        @user_agent          = user_agent.nil? ? @@DEFAULTUSERAGENT : user_agent
        @path                                = path.nil? ? @@DEFAULTPATH : path
        @timeout                     = timeout.nil? ? @@DEFAULTTIMEOUT : timeout
        @parse_option                = parse_option.nil? ? @@DEFAULTPARSEOPTION : parse_option
        @images                      = []
        @extracted_links     = []
        @rgxp_img_uri                = Regexp.new(/^(http|https|ftp)\:\/\/([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)?((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.[a-zA-Z]{2,4})(\:[0-9]+)?(\/[^\/][a-zA-Z0-9\.\,\?\'\\\/\+&%\$#\=~_\-@]*)\.(#{Images::IMAGE_EXT.join('|')})/i)
        #@rgxp_img_uri               = Regexp.new(/^(((http|https|ftp)\:\/\/)|www|(\/\/))([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)?((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.[a-zA-Z]{2,4})(\:[0-9]+)?(\/[^\/][a-zA-Z0-9\.\,\?\'\\\/\+&%\$#\=~_\-@]*)\.(#{Images::IMAGE_EXT.join('|')})/i)
        @domain                      = URI.parse(url).host
        @content                     = nil
end

Public Instance Methods

collect_images() click to toggle source
# File lib/static_image_download/parser.rb, line 116
def collect_images
        @extracted_links.each do |link|
                self.push_image(link)
        end
end
get_content_raw() click to toggle source
# File lib/static_image_download/parser.rb, line 69
def get_content_raw
        @content = self.get_url.read
        @content.gsub!(/[\n\r\t]+/,' ')
        #p @content if $debug_option
end
get_url() click to toggle source
# File lib/static_image_download/parser.rb, line 75
def get_url
        open(self.url, 'User-Agent' => self.user_agent)
end
img_parse_hpricot(h={}) click to toggle source
# File lib/static_image_download/parser.rb, line 84
def img_parse_hpricot(h={})
        doc = Hpricot(@content)
        get_extracted_links(doc.search("//img"))
end
img_parse_nokogiri(h={}) click to toggle source
# File lib/static_image_download/parser.rb, line 79
def img_parse_nokogiri(h={})
        doc = Nokogiri::HTML(@content)
        get_extracted_links(doc.search("//img"))
end
img_parse_uri_extract(h={}) click to toggle source
# File lib/static_image_download/parser.rb, line 89
def img_parse_uri_extract(h={})
        get_extracted_links(URI.extract(@content).select{ |l| l[/#{@rgxp_img_uri}/] })
end
method_to_value(option, h={}) click to toggle source
# File lib/static_image_download/parser.rb, line 57
def method_to_value(option, h={})
        method = option_to_method(option)
        p "method= #{method}" if $debug_option
        begin
                response = send(method, h) || ""
                return response
        rescue => error
                p "method_to_value.error = #{error}"
                nil
        end
end
option_to_method(option) click to toggle source
# File lib/static_image_download/parser.rb, line 53
def option_to_method(option)
        opt = PARSER_OPTIONS[option]
end
parse_images(h={}) click to toggle source
# File lib/static_image_download/parser.rb, line 103
def parse_images(h={})
        begin
                response = nil
                status = Timeout::timeout(@timeout) {
                        response = method_to_value(self.parse_option, h)
                        collect_images
                }
        rescue => error
                p "#{error}"
                nil
        end
end
push_image(src) click to toggle source
# File lib/static_image_download/parser.rb, line 122
def push_image(src)
        self.images.push Images.new(src, @path, Images.default_download_option)
end