class Apartmenthunter::Scraper
Attributes
area[RW]
bathrooms[RW]
bedrooms[RW]
form_mechanize[RW]
get_results[RW]
max_price[RW]
miles[RW]
min_price[RW]
page[RW]
result_page[RW]
results[RW]
scrape[RW]
scraper[RW]
set_address[RW]
zip[RW]
Public Class Methods
form_mechanize()
click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 50 def self.form_mechanize # Use Mechanize to enter search terms into the form fields desired. (Second form on page) form = @page.forms[1] form['min_price'] = @min_price form['max_price'] = @max_price form['bedrooms'] = @bedrooms form['bathrooms'] = @bathrooms form['search_distance'] = @miles form['postal'] = @zip @result_page = form.submit end
get_results()
click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 64 def self.get_results raw_results = @result_page.search('p.row') raw_results.each do |result| apt_hash = {:location => "", :name => "", :price => "", :url => ""} link = result.css('a')[1] apt_hash[:name] = link.text.strip apt_hash[:url] = "http://newyork.craigslist.org" + link.attributes["href"].value apt_hash[:price] = result.search('span.price').text apt_hash[:location] = result.search('span.pnr').text[3..-13] # Save results @results << apt_hash end progress = ProgressBar.create(:title => "Downloading", :total => 20, :length => 40) 20.times do sleep 0.1 progress.increment end puts @results binding.pry end
new(min_price, max_price, bedrooms, bathrooms, zip, miles)
click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 13 def initialize(min_price, max_price, bedrooms, bathrooms, zip, miles) @area = Area.set_area @min_price = min_price @max_price = max_price @bedrooms = bedrooms @bathrooms = bathrooms @zip = zip @miles = miles @results = [] end
scrape()
click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 45 def self.scrape #Let us scrape. @page = @scraper.get(@address) end
set_address()
click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 38 def self.set_address # Set the address for the area to be searched set by area method in CLI #@area = "/jsy" area = @area @address = "http://newyork.craigslist.org/search"+"#{area}"+"/aap" end
Public Instance Methods
mechanize()
click to toggle source
# File lib/apartmenthunter/scraper.rb, line 27 def mechanize # Instantiate a new web scraper with Mechanize @scraper = Mechanize.new # Mechanize setup to rate limit your scraping # to prevent IP ban. @scraper.history_added = Proc.new { sleep 0.5 } self.set_address end
parse_results()
click to toggle source
# File lib/apartmenthunter/scraper.rb, line 63 def parse_results raw_results = @result_page.search('p.row') raw_results.each do |result| apt_hash = {:location => "", :name => "", :price => "", :url => ""} link = result.css('a')[1] apt_hash[:name] = link.text.strip apt_hash[:url] = "http://newyork.craigslist.org" + link.attributes["href"].value apt_hash[:price] = result.search('span.price').text apt_hash[:location] = result.search('span.pnr').text[3..-13] # Save results @results << apt_hash end self.progress end
progress()
click to toggle source
# File lib/apartmenthunter/scraper.rb, line 82 def progress progress = ProgressBar.create(:title => "Downloading", :total => 20, :length => 40) 20.times do sleep 0.1 progress.increment end return @results end
run()
click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 24 def run # Instantiate a new web scraper with Mechanize @scraper = Mechanize.new # Mechanize setup to rate limit your scraping # to prevent IP ban. @scraper.history_added = Proc.new { sleep 0.5 } set_address scrape form_mechanize get_results return @results end
scrape_craig()
click to toggle source
# File lib/apartmenthunter/scraper.rb, line 45 def scrape_craig #Let us scrape. @page = @scraper.get(@address) # Use Mechanize to enter search terms into the form fields desired. (Second form on page) form = @page.forms[1] form['min_price'] = @min_price form['max_price'] = @max_price form['bedrooms'] = @bedrooms form['bathrooms'] = @bathrooms form['search_distance'] = @miles form['postal'] = @zip @result_page = form.submit self.parse_results end