class ImagesLink

Public Class Methods

new(link) click to toggle source

expect valid url

# File lib/images_link.rb, line 9
def initialize(link)
  @link = link

  begin
    @response = HTTParty.get(@link)
  rescue Errno::ECONNREFUSED => e
    puts "not valid url"
    abort e.message
  end

  @doc = Nokogiri::HTML(@response.body)
  @doc.search('//noscript').each { |node| node.remove } # убираю мешающие ноды
  @links = [] # сдесь будут храниться все урлы картинок
  @handler_link = HandlerLink # обработчик урлов
  @link_host_name = @handler_link.get_host_link(@link) # беру имя домена
end

Public Instance Methods

get_images_from_url() click to toggle source

returns all found images url

# File lib/images_link.rb, line 27
def get_images_from_url
  @links = (images_from_img_tag + images_from_link_tag + images_from_extension).uniq
  @handler_link.remove_global_unless_symbols(@links)
  @links
end
images_from_extension() click to toggle source

returns all images url with jpg, png, gif…

# File lib/images_link.rb, line 70
def images_from_extension
  # нахожу все урлы с jpg, png, gif...
  @images_links = URI.extract(@doc.to_s.encode("UTF-16be", :invalid => :replace, :replace => "?").encode('UTF-8')).select { |l| l[/\.(?:gif|png|jpe?g)\b/] }
  @handler_link.handler_links(@images_links, @link) # обрабатываю урлы
  @handler_link.remove_unless_symbols(@images_links)
  @handler_link.remove_unless_link(@images_links)
  @images_links.uniq
end
images_from_img_tag() click to toggle source

returns all images url with tags img

# File lib/images_link.rb, line 34
def images_from_img_tag
  img_array = []
  # пробегаю по тегам img (meta og:images...), хватаю урл и закидываю в @links
  @doc.xpath('//img').each do |img|
    if img['src'] != nil
      array = [img['src'].to_s]
      src = @handler_link.remove_unless_symbols(array)
      got_link = @handler_link.handler_prefix_link(@link_host_name, src.to_s.delete!("[\"]"))

      img_array << got_link
    end
  end

  img_array.uniq!
  @handler_link.remove_unless_link(img_array)
  img_array
end