class BookmeterExporter::Crawler
Public Class Methods
new(email, password)
click to toggle source
# File lib/bookmeter_exporter/crawler.rb, line 11 def initialize(email, password) @email = email @password = password @root_url = URI("https://bookmeter.com/") @user_id = nil end
Public Instance Methods
crawl()
click to toggle source
# File lib/bookmeter_exporter/crawler.rb, line 18 def crawl start_webdriver login book_urls = collect_book_urls fetch_books(book_urls) end
Private Instance Methods
collect_book_urls()
click to toggle source
# File lib/bookmeter_exporter/crawler.rb, line 57 def collect_book_urls go_read_books last_page_url = @driver.find_element(:css, "ul.bm-pagination").find_element(:link_text, "最後").attribute("href") last_page = CGI.parse(URI.parse(last_page_url).query)["page"].shift.to_i urls = [] (1..last_page).each do |page| page_url = @root_url.merge("/users/#{@user_id}/books/read?page=#{page}") @driver.get page_url book_list_css_selector = ".book-list--grid ul.book-list__group" @wait.until { @driver.current_url == page_url.to_s } @driver.find_elements(:css, book_list_css_selector).each do |ul| ul.find_elements(:css, "li .detail__title").each do |li| urls << li.find_element(:tag_name, "a").attribute("href") end end end puts "Book count: #{urls.count}" urls end
fetch_book(url)
click to toggle source
# File lib/bookmeter_exporter/crawler.rb, line 87 def fetch_book(url) @driver.get url @wait.until do %r{/books/[0-9]+$}.match(@driver.current_url) end book_asin = @driver.find_element(:css, ".sidebar__group .group__image a").attribute("href") .gsub(%r{https://www.amazon.co.jp/dp/product/(.+)/.*}, '\1') read_date = @driver.find_element(:css, ".read-book__date").text review_text = @driver.find_element(:css, ".read-book__content").text Book.new(book_asin, read_date, review_text) end
fetch_books(book_urls)
click to toggle source
# File lib/bookmeter_exporter/crawler.rb, line 45 def fetch_books(book_urls) books = Books.new fetched_books_count = 0 book_urls.each do |url| books << fetch_book(url) fetched_books_count += 1 puts "#{fetched_books_count} books fetched..." if (fetched_books_count % 10).zero? end puts "All books fetched." books end
go_read_books()
click to toggle source
# File lib/bookmeter_exporter/crawler.rb, line 81 def go_read_books read_books_url = @root_url.merge("/users/#{@user_id}/books/read") @driver.get read_books_url @wait.until { sleep 3 } end
login()
click to toggle source
# File lib/bookmeter_exporter/crawler.rb, line 33 def login @driver.get @root_url.merge("/login") @driver.find_element(:id, "session_email_address").send_keys @email @driver.find_element(:id, "session_password").send_keys @password @driver.find_element(:css, "form[action='/login'] button[type=submit]").click @wait.until { @driver.current_url == @root_url.merge("/home").to_s } puts "Login success." @user_id = @driver.find_element(:css, ".personal-account__data__link").attribute("href")[%r{/([0-9]+)$}, 1] end
start_webdriver()
click to toggle source
# File lib/bookmeter_exporter/crawler.rb, line 27 def start_webdriver puts "Starting Chrome..." @driver = Selenium::WebDriver.for :chrome @wait = Selenium::WebDriver::Wait.new(timeout: 10) end