class Compactor::Amazon::ReportScraper
Public Class Methods
new(user_credentials={})
click to toggle source
# File lib/compactor/scraper.rb, line 24 def initialize(user_credentials={}) @mechanize = agent @mechanize.max_file_buffer = 4 * 1024 * 1024 @mechanize.max_history = 2 @mechanize.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE @mechanize.agent.http.reuse_ssl_sessions = false @validate_totals = user_credentials[:validate_totals] randomize_user_agent! login_to_seller_central user_credentials[:email], user_credentials[:password] end
Private Class Methods
report_type(report_identifier)
click to toggle source
Make this into a hash instead
# File lib/compactor/scraper.rb, line 222 def self.report_type(report_identifier) return :xml if xml_report?(report_identifier) return :tsv if text_v1_report?(report_identifier) return :tsv2 if text_v2_report?(report_identifier) fail Compactor::Amazon::UnknownReportType end
text_v1_report?(report_identifier)
click to toggle source
# File lib/compactor/scraper.rb, line 213 def self.text_v1_report?(report_identifier) report_identifier == "Download Flat File" end
text_v2_report?(report_identifier)
click to toggle source
# File lib/compactor/scraper.rb, line 217 def self.text_v2_report?(report_identifier) report_identifier == "Download Flat File V2" end
xml_report?(report_identifier)
click to toggle source
# File lib/compactor/scraper.rb, line 209 def self.xml_report?(report_identifier) report_identifier == "Download XML" end
Public Instance Methods
buyer_name()
click to toggle source
# File lib/compactor/scraper.rb, line 92 def buyer_name tr = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field'][text()=\"Contact Buyer:\"]").first.parent td = tr.search!("td[2]") td.text.strip rescue => e "" end
get_balance()
click to toggle source
# File lib/compactor/scraper.rb, line 78 def get_balance go_to_past_settlements('', '') return 0.0 if page_has_no_results? open_row = report_rows.detect { |row| row.not_settled_report? } open_row.nil? ? 0.0 : open_row.deposit_amount end
get_marketplaces()
click to toggle source
# File lib/compactor/scraper.rb, line 51 def get_marketplaces @mechanize.get MARKETPLACE_HOMEPAGE marketplace_selector = @mechanize.page.search("#marketplaceSelect, #sc-mkt-switcher-select").first if marketplace_selector result = [] marketplace_selector.search("option").each do |ele| name = ele.text marketplace_id = ele["value"] result << [ name, marketplace_id ] end return result end marketplace_name = @mechanize.page.search("#market_switch .merch-site-span, #sc-mkt-switcher-form .sc-mkt-switcher-txt") if marketplace_name return [ [ marketplace_name.text.strip, nil ] ] end return [] end
get_orders(order_ids)
click to toggle source
# File lib/compactor/scraper.rb, line 118 def get_orders(order_ids) orders_hash = {} order_ids.each do |order_id| orders_hash[order_id] = payee_details(order_id) end orders_hash end
marketplaces()
click to toggle source
# File lib/compactor/scraper.rb, line 36 def marketplaces marketplaces = wait_for_element { get_marketplaces } raise MissingMarketplaceError if marketplaces.blank? marketplaces = filter_marketplaces(marketplaces) raise NoMarketplacesError if marketplaces.empty? marketplaces.map do |account_name, marketplace_id| select_marketplace(marketplace_id) balance = get_balance [ account_name, marketplace_id, balance ] end end
payee_details(order_id)
click to toggle source
# File lib/compactor/scraper.rb, line 108 def payee_details(order_id) @mechanize.get order_detail_url(order_id) order = {} order["BuyerName"] = buyer_name order["ShippingAddress"] = shipping_address order rescue => e nil end
reports(from, to)
click to toggle source
# File lib/compactor/scraper.rb, line 86 def reports(from, to) from, to = parse_dates(from, to) go_to_past_settlements(from, to) get_reports end
select_marketplace(marketplace_id)
click to toggle source
# File lib/compactor/scraper.rb, line 73 def select_marketplace(marketplace_id) marketplace_id = CGI.escape(marketplace_id) @mechanize.get "https://sellercentral.amazon.com/gp/utilities/set-rainier-prefs.html?ie=UTF8&&marketplaceID=#{marketplace_id}" end
shipping_address()
click to toggle source
# File lib/compactor/scraper.rb, line 100 def shipping_address td = @mechanize.page.search!("//tr[@class='list-row']/td[@class='data-display-field']/strong[text()='Shipping Address:']").first.parent addr_lines = td.children.map(&:text).reject { |l| l.blank? || l =~ /^Shipping Address/ } parse_address_lines!(addr_lines) rescue => e "" end
Private Instance Methods
add_to_collection(reports, row)
click to toggle source
Find the report to download from a row, and add it to a collection of reports. Do this while ensuring that the current page stays the current page.
# File lib/compactor/scraper.rb, line 259 def add_to_collection(reports, row) @mechanize.transact do report_type, report = row.download_report!(@validate_totals) reports[report_type] ||= [] reports[report_type] << report end end
agent()
click to toggle source
# File lib/compactor/scraper.rb, line 128 def agent Mechanize.new end
bad_login?()
click to toggle source
# File lib/compactor/scraper.rb, line 350 def bad_login? !@mechanize.page.parser.css("#message_error").blank? || !@mechanize.page.parser.css(".messageboxerror").blank? || @mechanize.page.parser.css('.tiny').text.include?('Sorry, you are not an authorized Seller Central user') end
default_number_of_attempts()
click to toggle source
# File lib/compactor/scraper.rb, line 241 def default_number_of_attempts 6 end
filter_marketplaces(marketplaces)
click to toggle source
# File lib/compactor/scraper.rb, line 136 def filter_marketplaces(marketplaces) results = [] name, marketplace_id = marketplaces.detect do |n, m_id| n == 'www.amazon.com' && ( m_id.nil? || m_id == AMAZON_COM_MARKETPLACE_ID ) end results << [ 'Amazon Seller Account', AMAZON_COM_MARKETPLACE_ID ] if name name, marketplace_id = marketplaces.detect do |n, m_id| (n == 'Your Checkout Website' || n == "Checkout by Amazon (Production View)") && !m_id.nil? end results << [ 'Checkout By Amazon', marketplace_id ] if name results end
get_reports()
click to toggle source
# File lib/compactor/scraper.rb, line 196 def get_reports reports = {} page_num = 0 begin get_reports_in_page.each do |report_type, report_streams| reports[report_type] ||= [] reports[report_type] << report_streams end page_num += 1 end while pages_to_parse reports.each { |type, streams| streams.flatten! } end
get_reports_in_page()
click to toggle source
# File lib/compactor/scraper.rb, line 308 def get_reports_in_page reports_to_watch = [] reports = {} return reports if page_has_no_results? report_rows.each do |row| if row.can_download_report? add_to_collection(reports, row) elsif row.requestable_report? @mechanize.transact do row.request_report reports_to_watch << row end end end get_reports_to_watch(reports_to_watch, reports) reports end
get_reports_to_watch(reports_to_watch, reports, count=0)
click to toggle source
# File lib/compactor/scraper.rb, line 267 def get_reports_to_watch(reports_to_watch, reports, count=0) return if reports_to_watch.empty? || timeout_fetching_reports(count) rescue_empty_results { @mechanize.get @mechanize.page.uri } reports_to_watch.reject! do |row| row = row.reload if row.nil? true elsif row.can_download_report? add_to_collection(reports, row) end end slowdown_like_a_human(count) get_reports_to_watch(reports_to_watch, reports, count+1) end
go_to_past_settlements(from, to)
click to toggle source
# File lib/compactor/scraper.rb, line 187 def go_to_past_settlements(from, to) from = CGI.escape(from) to = CGI.escape(to) @mechanize.get "https://sellercentral.amazon.com/gp/payments-account/past-settlements.html?endDate=#{to}&startDate=#{from}&pageSize=Ten" rescue Mechanize::ResponseCodeError => e raise ::Compactor::Amazon::NotProAccountError if e.message["403 => Net::HTTPForbidden"] raise # any other error just re-raise it as is end
locked_account?()
click to toggle source
# File lib/compactor/scraper.rb, line 356 def locked_account? alert_box = @mechanize.page.search(".messageboxalert") alert_box && alert_box.text.include?("limited access to your seller account") end
login_to_seller_central(email, password)
click to toggle source
# File lib/compactor/scraper.rb, line 333 def login_to_seller_central(email, password) email_field_exists = wait_for_element do @mechanize.get MARKETPLACE_HOMEPAGE first_form = @mechanize.page.forms.first !first_form["email"].nil? end raise Compactor::Amazon::LoginFormNotFoundError unless email_field_exists form = @mechanize.page.forms.first form.email = email form.password = password form.submit raise Compactor::Amazon::AuthenticationError if bad_login? raise Compactor::Amazon::LockedAccountError if locked_account? end
order_detail_url(order_id)
click to toggle source
# File lib/compactor/scraper.rb, line 152 def order_detail_url(order_id) "https://sellercentral.amazon.com/gp/orders-v2/details?ie=UTF8&orderID=#{order_id}" end
page_has_no_results?()
click to toggle source
# File lib/compactor/scraper.rb, line 300 def page_has_no_results? data_display_element = @mechanize.page.search(".data-display") fail ReportLoadingTimeout if data_display_element.blank? data_display_element.text.include? "No results found" end
pages_to_parse()
click to toggle source
# File lib/compactor/scraper.rb, line 284 def pages_to_parse next_button = @mechanize.page.links_with(:text => "Next")[0] return false if next_button.nil? next_button.click end
parse_address_lines!(addr_lines)
click to toggle source
# File lib/compactor/scraper.rb, line 156 def parse_address_lines!(addr_lines) nbsp = "\302\240" addr_lines = addr_lines.map { |line| line.gsub(nbsp, " ") } # Assume the first line is the name of the buyer, so skip it addr_lines = addr_lines[1..-1].reject { |l| l =~ /^Phone:/ } raise AddressParseFailure if addr_lines.empty? citystate_line = addr_lines.pop city, remainder = citystate_line.split(/,\s*/) raise AddressParseFailure if remainder.nil? state, postalcode = remainder.split(/\s+/) { 'street' => addr_lines.join('\n'), 'city' => city, 'state' => state, 'postalcode' => postalcode } end
parse_dates(from, to)
click to toggle source
# File lib/compactor/scraper.rb, line 329 def parse_dates(from, to) [ Date.parse_to_us_format(from.to_s), Date.parse_to_us_format(to.to_s) ] end
randomize_user_agent!()
click to toggle source
Pick a random user agent that isn't Mechanize
# File lib/compactor/scraper.rb, line 180 def randomize_user_agent! agents = Mechanize::AGENT_ALIASES.keys.reject{ |k| k == "Mechanize" } @mechanize.user_agent = agents.respond_to?(:choice) ? agents.choice : agents.sample end
report_rows()
click to toggle source
# File lib/compactor/scraper.rb, line 291 def report_rows tables = @mechanize.page.search!("#content-main-entities > table") rows = tables[1].search("tr[class]").select do |ele| ["list-row-even","list-row-odd"].include? ele["class"] end rows.map { |raw_row| ScrapedRow.new(raw_row, @mechanize) } end
rescue_empty_results() { || ... }
click to toggle source
# File lib/compactor/scraper.rb, line 245 def rescue_empty_results(&block) 3.times do yield break unless page_has_no_results? end end
slowdown_like_a_human(count)
click to toggle source
# File lib/compactor/scraper.rb, line 132 def slowdown_like_a_human(count) sleep count ** 2 end
timeout_fetching_reports(count)
click to toggle source
# File lib/compactor/scraper.rb, line 252 def timeout_fetching_reports(count) count > ATTEMPTS_BEFORE_GIVING_UP end
wait_for_element(attempts=default_number_of_attempts) { || ... }
click to toggle source
6 attempts make it wait at most a minute, or close enough to it
# File lib/compactor/scraper.rb, line 231 def wait_for_element(attempts=default_number_of_attempts, &block) attempts.times do |attempt| element = yield return element unless element.blank? sleep 2**attempt # => 1 sec, 2 secs, 4, 8, 16, 32, etc end nil # no element found end