module ImageScraper
Constants
- VERSION
Public Class Methods
Scrap()
click to toggle source
A method to download all files at a given url
# File lib/ImageScraper.rb, line 72 def self.Scrap() self.getPage puts "Page Found" self.getImgNames self.getImgLinks self.download end
download()
click to toggle source
A method to download images
# File lib/ImageScraper.rb, line 44 def self.download() len = @imgLinks.length a = @imgLinks files = @files len.times do |f| puts "#{a[f]} found" File.open(files[f], "w") do |fo| fo.write open(a[f]).read end puts "#{files[f]} downloaded" end end
getImgLinks()
click to toggle source
A method to get image links
# File lib/ImageScraper.rb, line 27 def self.getImgLinks() img = @page.css('img') @imgLinks = Array.new new_url = self.getNewUrl() if @url[@url.length-1] != '/' @url += "/" end print new_url img.each do |i| @imgLinks.push(@url+i["src"]) end end
getImgNames()
click to toggle source
A method to download all images
# File lib/ImageScraper.rb, line 58 def self.getImgNames() if not File.exists?(@folder) Dir.mkdir(@folder) puts "#{@folder} Directory Created" end @files = Array.new img = @page.css('img') img.each do |i| nodes = i["src"].split("/") @files.push(@folder + "/" + nodes[nodes.length - 1]) end end
getNewUrl()
click to toggle source
# File lib/ImageScraper.rb, line 19 def self.getNewUrl() new_url = /([a-z0-9]|_)+.(html)|(php)$/.match(@url) puts new_url.to_s n = new_url.to_s return @url.slice! n end
getPage()
click to toggle source
A function to get HTML of page at a url
# File lib/ImageScraper.rb, line 14 def self.getPage() @page = Nokogiri::HTML(open(@url).read) end
initialize(url, folder = "Images")
click to toggle source
The constructor
# File lib/ImageScraper.rb, line 8 def self.initialize(url, folder = "Images") @url = url @folder = folder end