class Apartmenthunter::Scraper

Attributes

area[RW]
bathrooms[RW]
bedrooms[RW]
form_mechanize[RW]
get_results[RW]
max_price[RW]
miles[RW]
min_price[RW]
page[RW]
result_page[RW]
results[RW]
scrape[RW]
scraper[RW]
set_address[RW]
zip[RW]

Public Class Methods

form_mechanize() click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 50
def self.form_mechanize
  # Use Mechanize to enter search terms into the form fields desired. (Second form on page)
  form = @page.forms[1]

    form['min_price'] = @min_price
    form['max_price'] = @max_price
    form['bedrooms'] = @bedrooms
    form['bathrooms'] = @bathrooms
    form['search_distance'] = @miles
    form['postal'] = @zip

  @result_page = form.submit
end
get_results() click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 64
def self.get_results
  raw_results = @result_page.search('p.row')

  raw_results.each do |result|
    apt_hash = {:location => "", :name => "", :price => "", :url => ""}
    link = result.css('a')[1]
    apt_hash[:name] = link.text.strip
    apt_hash[:url] = "http://newyork.craigslist.org" + link.attributes["href"].value
    apt_hash[:price] = result.search('span.price').text
    apt_hash[:location] = result.search('span.pnr').text[3..-13]
    # Save results
    @results << apt_hash
  end

  progress = ProgressBar.create(:title => "Downloading", :total => 20, :length => 40)
  20.times do
    sleep 0.1
    progress.increment
  end
  puts @results
  binding.pry
end
new(min_price, max_price, bedrooms, bathrooms, zip, miles) click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 13
def initialize(min_price, max_price, bedrooms, bathrooms, zip, miles)
  @area = Area.set_area
  @min_price = min_price
  @max_price = max_price
  @bedrooms = bedrooms
  @bathrooms = bathrooms
  @zip = zip
  @miles = miles
  @results = []
end
scrape() click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 45
def self.scrape
  #Let us scrape.
  @page = @scraper.get(@address)
end
set_address() click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 38
def self.set_address
  # Set the address for the area to be searched set by area method in CLI
  #@area = "/jsy"
  area = @area
  @address = "http://newyork.craigslist.org/search"+"#{area}"+"/aap"
end

Public Instance Methods

mechanize() click to toggle source
# File lib/apartmenthunter/scraper.rb, line 27
def mechanize
  # Instantiate a new web scraper with Mechanize
  @scraper = Mechanize.new

  # Mechanize setup to rate limit your scraping
  # to prevent IP ban.
  @scraper.history_added = Proc.new { sleep 0.5 }
  self.set_address
end
parse_results() click to toggle source
# File lib/apartmenthunter/scraper.rb, line 63
def parse_results
  raw_results = @result_page.search('p.row')

  raw_results.each do |result|
    apt_hash = {:location => "", :name => "", :price => "", :url => ""}
    link = result.css('a')[1]
    apt_hash[:name] = link.text.strip
    apt_hash[:url] = "http://newyork.craigslist.org" + link.attributes["href"].value
    apt_hash[:price] = result.search('span.price').text
    apt_hash[:location] = result.search('span.pnr').text[3..-13]



    # Save results
    @results << apt_hash
  end
  self.progress
end
progress() click to toggle source
# File lib/apartmenthunter/scraper.rb, line 82
def progress
  progress = ProgressBar.create(:title => "Downloading", :total => 20, :length => 40)
  20.times do
    sleep 0.1
    progress.increment
  end

  return @results
end
run() click to toggle source
# File lib/apartmenthunter/scraper-inprog.rb, line 24
def run
  # Instantiate a new web scraper with Mechanize
  @scraper = Mechanize.new

  # Mechanize setup to rate limit your scraping
  # to prevent IP ban.
  @scraper.history_added = Proc.new { sleep 0.5 }
  set_address
  scrape
  form_mechanize
  get_results
  return @results
end
scrape_craig() click to toggle source
# File lib/apartmenthunter/scraper.rb, line 45
def scrape_craig
  #Let us scrape.
  @page = @scraper.get(@address)

  # Use Mechanize to enter search terms into the form fields desired. (Second form on page)
  form = @page.forms[1]

    form['min_price'] = @min_price
    form['max_price'] = @max_price
    form['bedrooms'] = @bedrooms
    form['bathrooms'] = @bathrooms
    form['search_distance'] = @miles
    form['postal'] = @zip

  @result_page = form.submit
  self.parse_results
end