class Compactor::Amazon::ReportScraper

Public Class Methods

new(user_credentials={}) click to toggle source
# File lib/compactor/scraper.rb, line 24
def initialize(user_credentials={})
  @mechanize = agent
  @mechanize.max_file_buffer               = 4 * 1024 * 1024
  @mechanize.max_history                   = 2
  @mechanize.agent.http.verify_mode        = OpenSSL::SSL::VERIFY_NONE
  @mechanize.agent.http.reuse_ssl_sessions = false
  @validate_totals                         = user_credentials[:validate_totals]

  randomize_user_agent!
  login_to_seller_central user_credentials[:email], user_credentials[:password]
end

Private Class Methods

report_type(report_identifier) click to toggle source

Make this into a hash instead

# File lib/compactor/scraper.rb, line 222
def self.report_type(report_identifier)
  return :xml  if xml_report?(report_identifier)
  return :tsv  if text_v1_report?(report_identifier)
  return :tsv2 if text_v2_report?(report_identifier)

  fail Compactor::Amazon::UnknownReportType
end
text_v1_report?(report_identifier) click to toggle source
# File lib/compactor/scraper.rb, line 213
def self.text_v1_report?(report_identifier)
  report_identifier == "Download Flat File"
end
text_v2_report?(report_identifier) click to toggle source
# File lib/compactor/scraper.rb, line 217
def self.text_v2_report?(report_identifier)
  report_identifier == "Download Flat File V2"
end
xml_report?(report_identifier) click to toggle source
# File lib/compactor/scraper.rb, line 209
def self.xml_report?(report_identifier)
  report_identifier == "Download XML"
end

Public Instance Methods

buyer_name() click to toggle source
# File lib/compactor/scraper.rb, line 92
def buyer_name
  tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent
  td = tr.search!("td[2]")
  td.text.strip
rescue => e
  ""
end
get_balance() click to toggle source
# File lib/compactor/scraper.rb, line 78
def get_balance
  go_to_past_settlements('', '')
  return 0.0 if page_has_no_results?
  open_row = report_rows.detect { |row| row.not_settled_report? }

  open_row.nil? ? 0.0 : open_row.deposit_amount
end
get_marketplaces() click to toggle source
# File lib/compactor/scraper.rb, line 51
def get_marketplaces
  @mechanize.get MARKETPLACE_HOMEPAGE

  marketplace_selector = @mechanize.page.search("#marketplaceSelect, #sc-mkt-switcher-select").first
  if marketplace_selector
    result = []
    marketplace_selector.search("option").each do |ele|
      name = ele.text
      marketplace_id = ele["value"]
      result << [ name, marketplace_id ]
    end
    return result
  end

  marketplace_name = @mechanize.page.search("#market_switch .merch-site-span, #sc-mkt-switcher-form .sc-mkt-switcher-txt")
  if marketplace_name
    return [ [ marketplace_name.text.strip, nil ] ]
  end

  return []
end
get_orders(order_ids) click to toggle source
# File lib/compactor/scraper.rb, line 118
def get_orders(order_ids)
  orders_hash = {}
  order_ids.each do |order_id|
    orders_hash[order_id] = payee_details(order_id)
  end
  orders_hash
end
marketplaces() click to toggle source
# File lib/compactor/scraper.rb, line 36
def marketplaces
  marketplaces = wait_for_element { get_marketplaces }
  raise MissingMarketplaceError if marketplaces.blank?

  marketplaces = filter_marketplaces(marketplaces)
  raise NoMarketplacesError if marketplaces.empty?

  marketplaces.map do |account_name, marketplace_id|
    select_marketplace(marketplace_id)
    balance = get_balance

    [ account_name, marketplace_id, balance ]
  end
end
payee_details(order_id) click to toggle source
# File lib/compactor/scraper.rb, line 108
def payee_details(order_id)
  @mechanize.get order_detail_url(order_id)
  order = {}
  order["BuyerName"]       = buyer_name
  order["ShippingAddress"] = shipping_address
  order
rescue => e
  nil
end
reports(from, to) click to toggle source
# File lib/compactor/scraper.rb, line 86
def reports(from, to)
  from, to = parse_dates(from, to)
  go_to_past_settlements(from, to)
  get_reports
end
select_marketplace(marketplace_id) click to toggle source
# File lib/compactor/scraper.rb, line 73
def select_marketplace(marketplace_id)
  marketplace_id = CGI.escape(marketplace_id)
  @mechanize.get "https://sellercentral.amazon.com/gp/utilities/set-rainier-prefs.html?ie=UTF8&&marketplaceID=#{marketplace_id}"
end
shipping_address() click to toggle source
# File lib/compactor/scraper.rb, line 100
def shipping_address
  td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent
  addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ }
  parse_address_lines!(addr_lines)
rescue => e
  ""
end

Private Instance Methods

add_to_collection(reports, row) click to toggle source

Find the report to download from a row, and add it to a collection of reports. Do this while ensuring that the current page stays the current page.

# File lib/compactor/scraper.rb, line 259
def add_to_collection(reports, row)
  @mechanize.transact do
    report_type, report = row.download_report!(@validate_totals)
    reports[report_type] ||= []
    reports[report_type] << report
  end
end
agent() click to toggle source
# File lib/compactor/scraper.rb, line 128
def agent
  Mechanize.new
end
bad_login?() click to toggle source
# File lib/compactor/scraper.rb, line 350
def bad_login?
  !@mechanize.page.parser.css("#message_error").blank? ||
  !@mechanize.page.parser.css(".messageboxerror").blank? ||
    @mechanize.page.parser.css('.tiny').text.include?('Sorry, you are not an authorized Seller Central user')
end
default_number_of_attempts() click to toggle source
# File lib/compactor/scraper.rb, line 241
def default_number_of_attempts
  6
end
filter_marketplaces(marketplaces) click to toggle source
# File lib/compactor/scraper.rb, line 136
def filter_marketplaces(marketplaces)
  results = []

  name, marketplace_id = marketplaces.detect do |n, m_id|
    n == 'www.amazon.com' && ( m_id.nil? || m_id == AMAZON_COM_MARKETPLACE_ID )
  end
  results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name

  name, marketplace_id = marketplaces.detect do |n, m_id|
    (n == 'Your Checkout Website' || n == "Checkout by Amazon (Production View)") && !m_id.nil?
  end
  results << [ 'Checkout By Amazon', marketplace_id ] if name

  results
end
get_reports() click to toggle source
# File lib/compactor/scraper.rb, line 196
def get_reports
  reports = {}
  page_num = 0
  begin
    get_reports_in_page.each do |report_type, report_streams|
      reports[report_type] ||= []
      reports[report_type] << report_streams
    end
    page_num += 1
  end while pages_to_parse
  reports.each { |type, streams| streams.flatten! }
end
get_reports_in_page() click to toggle source
# File lib/compactor/scraper.rb, line 308
def get_reports_in_page
  reports_to_watch = []
  reports = {}
  return reports if page_has_no_results?

  report_rows.each do |row|
    if row.can_download_report?
      add_to_collection(reports, row)
    elsif row.requestable_report?
      @mechanize.transact do
        row.request_report
        reports_to_watch << row
      end
    end
  end

  get_reports_to_watch(reports_to_watch, reports)

  reports
end
get_reports_to_watch(reports_to_watch, reports, count=0) click to toggle source
# File lib/compactor/scraper.rb, line 267
def get_reports_to_watch(reports_to_watch, reports, count=0)
  return if reports_to_watch.empty? || timeout_fetching_reports(count)

  rescue_empty_results { @mechanize.get @mechanize.page.uri }
  reports_to_watch.reject! do |row|
    row = row.reload
    if row.nil?
      true
    elsif row.can_download_report?
      add_to_collection(reports, row)
    end
  end

  slowdown_like_a_human(count)
  get_reports_to_watch(reports_to_watch, reports, count+1)
end
go_to_past_settlements(from, to) click to toggle source
# File lib/compactor/scraper.rb, line 187
def go_to_past_settlements(from, to)
  from = CGI.escape(from)
  to   = CGI.escape(to)
  @mechanize.get "https://sellercentral.amazon.com/gp/payments-account/past-settlements.html?endDate=#{to}&startDate=#{from}&pageSize=Ten"
rescue Mechanize::ResponseCodeError => e
  raise ::Compactor::Amazon::NotProAccountError if e.message["403 => Net::HTTPForbidden"]
  raise # any other error just re-raise it as is
end
locked_account?() click to toggle source
# File lib/compactor/scraper.rb, line 356
def locked_account?
  alert_box = @mechanize.page.search(".messageboxalert")
  alert_box && alert_box.text.include?("limited access to your seller account")
end
login_to_seller_central(email, password) click to toggle source
# File lib/compactor/scraper.rb, line 333
def login_to_seller_central(email, password)
  email_field_exists = wait_for_element do
    @mechanize.get MARKETPLACE_HOMEPAGE
    first_form = @mechanize.page.forms.first
    !first_form["email"].nil?
  end
  raise Compactor::Amazon::LoginFormNotFoundError unless email_field_exists

  form = @mechanize.page.forms.first
  form.email    = email
  form.password = password
  form.submit

  raise Compactor::Amazon::AuthenticationError if bad_login?
  raise Compactor::Amazon::LockedAccountError  if locked_account?
end
order_detail_url(order_id) click to toggle source
# File lib/compactor/scraper.rb, line 152
def order_detail_url(order_id)
  "https://sellercentral.amazon.com/gp/orders-v2/details?ie=UTF8&orderID=#{order_id}"
end
page_has_no_results?() click to toggle source
# File lib/compactor/scraper.rb, line 300
def page_has_no_results?
  data_display_element = @mechanize.page.search(".data-display")

  fail ReportLoadingTimeout if data_display_element.blank?

  data_display_element.text.include? "No results found"
end
pages_to_parse() click to toggle source
# File lib/compactor/scraper.rb, line 284
def pages_to_parse
  next_button = @mechanize.page.links_with(:text => "Next")[0]
  return false if next_button.nil?

  next_button.click
end
parse_address_lines!(addr_lines) click to toggle source
# File lib/compactor/scraper.rb, line 156
def parse_address_lines!(addr_lines)
  nbsp = "\302\240"
  addr_lines = addr_lines.map { |line| line.gsub(nbsp, " ") }
  # Assume the first line is the name of the buyer, so skip it
  addr_lines = addr_lines[1..-1].reject { |l| l =~ /^Phone:/ }

  raise AddressParseFailure if addr_lines.empty?

  citystate_line = addr_lines.pop
  city, remainder = citystate_line.split(/,\s*/)

  raise AddressParseFailure if remainder.nil?

  state, postalcode = remainder.split(/\s+/)

  {
    'street'     => addr_lines.join('\n'),
    'city'       => city,
    'state'      => state,
    'postalcode' => postalcode
  }
end
parse_dates(from, to) click to toggle source
# File lib/compactor/scraper.rb, line 329
def parse_dates(from, to)
  [ Date.parse_to_us_format(from.to_s), Date.parse_to_us_format(to.to_s) ]
end
randomize_user_agent!() click to toggle source

Pick a random user agent that isn't Mechanize

# File lib/compactor/scraper.rb, line 180
def randomize_user_agent!
  agents = Mechanize::AGENT_ALIASES.keys.reject{ |k| k == "Mechanize" }

  @mechanize.user_agent =
    agents.respond_to?(:choice) ? agents.choice : agents.sample
end
report_rows() click to toggle source
# File lib/compactor/scraper.rb, line 291
def report_rows
  tables = @mechanize.page.search!("#content-main-entities > table")
  rows = tables[1].search("tr[class]").select do |ele|
    ["list-row-even","list-row-odd"].include? ele["class"]
  end

  rows.map { |raw_row| ScrapedRow.new(raw_row, @mechanize) }
end
rescue_empty_results() { || ... } click to toggle source
# File lib/compactor/scraper.rb, line 245
def rescue_empty_results(&block)
  3.times do
    yield
    break unless page_has_no_results?
  end
end
slowdown_like_a_human(count) click to toggle source
# File lib/compactor/scraper.rb, line 132
def slowdown_like_a_human(count)
  sleep count ** 2
end
timeout_fetching_reports(count) click to toggle source
# File lib/compactor/scraper.rb, line 252
def timeout_fetching_reports(count)
  count > ATTEMPTS_BEFORE_GIVING_UP
end
wait_for_element(attempts=default_number_of_attempts) { || ... } click to toggle source

6 attempts make it wait at most a minute, or close enough to it

# File lib/compactor/scraper.rb, line 231
def wait_for_element(attempts=default_number_of_attempts, &block)
  attempts.times do |attempt|
    element = yield
    return element unless element.blank?
    sleep 2**attempt # => 1 sec, 2 secs, 4, 8, 16, 32, etc
  end

  nil # no element found
end