class AmazonWishList
Constants
- REVEAL_OPTIONS
- SORT_OPTIONS
Attributes
id[RW]
wishes[RW]
Public Class Methods
check_for_redirect(url)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 92 def self.check_for_redirect(url) begin response = RestClient::Request.execute(method: :get, url: url, max_redirects: 0) rescue RestClient::ExceptionWithResponse => err if err.response.code / 100 == 3 url = err.response.headers[:location] retry else raise err end end url end
find_lek_from_response(response)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 76 def self.find_lek_from_response(response) # As of the time of writing this, "lastEvaluatedKey", abbreviated as "lek", # is used to keep track of what portions of the wishlist have already been # loaded, and is sent in the query string of ajax calls to get the next page start_of_lek = response.body.split('name="lastEvaluatedKey" value="')[1] start_of_lek.split('" class="lastEvaluatedKey"')[0] end
get_all_wishlist_pages(url_without_qstring, query_params)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 54 def self.get_all_wishlist_pages(url_without_qstring, query_params) responses = Array.new loop do response = self.get_wishlist_page(url_without_qstring, query_params) responses << response return responses if response.body.include?("Find a gift") # as of the #=> time this was written, this phrase appears only on the last page lek = self.find_lek_from_response(response) query_params[:lek] = lek # the rest of the query_params hash stays the same end end
get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
click to toggle source
def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal) raise "invalid sort" unless SORT_OPTIONS[sort] query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]} # lek is nil for the first page url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}" pages = self.get_all_wishlist_pages(url_without_qstring, query_params) wishes = AmazonWish.parse_wishes_from_pages(pages) AmazonWishList.new(amazon_list_id, wishes) end
end¶ ↑
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 49 def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') wishes = AmazonWish.wishes_from_attributes(wishlist_attributes(amazon_list_id, reveal, sort, tld)) AmazonWishList.new(amazon_list_id, wishes) end
get_wishlist_page(url_without_qstring, query_params)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 66 def self.get_wishlist_page(url_without_qstring, query_params) query_string = self.page_query_string(query_params) RestClient.get(url_without_qstring + query_string) end
new(id, wishes)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 10 def initialize(id, wishes) @id = id @wishes = wishes end
page_query_string(query_params)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 71 def self.page_query_string(query_params) "?reveal=#{query_params[:reveal]}&layout=standard&sort=#{query_params[:sort_string]})" + (query_params[:lek] ? "&lek=#{query_params[:lek]}&type=wishlist&ajax=true" : '') end
wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
click to toggle source
the method below only sends one request, so it less likely to be identified as being used by a scraper
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 16 def self.wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal) raise "invalid sort" unless SORT_OPTIONS[sort] query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]} # lek is nil for the first page url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}" pages = self.get_all_wishlist_pages(url_without_qstring, query_params) AmazonWish.attributes_from_responses(pages) end
wishlist_from_url(url)
click to toggle source
# File lib/amazon_wish_miner/amazon_wish_list.rb, line 84 def self.wishlist_from_url(url) url = HTTParty.get(url).request.last_uri.to_s #url = check_for_redirect(url) id_start = url.split('/wishlist/')[1] id = id_start.split('/').find { |str| str != 'ls' } get_wishlist(id) end