class BookmeterScraper::Scraper

Constants

BOOK_ATTRIBUTES
Book
JP_ATTRIBUTE_NAMES
NUM_BOOKS_PER_PAGE
NUM_USERS_PER_PAGE
PROFILE_ATTRIBUTES
Profile
USER_ATTRIBUTES
User

Attributes

agent[RW]

Public Class Methods

new(agent = nil) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 74
def initialize(agent = nil)
  @agent = agent
  @book_pages = {}
end

Public Instance Methods

extract_books(page) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 151
def extract_books(page)
  raise ArgumentError if page.nil?

  books = []
  1.upto(NUM_BOOKS_PER_PAGE) do |i|
    break if page["book_#{i}_link"].empty?

    read_dates = []
    read_date  = scrape_read_date(page["book_#{i}_link"])
    unless read_date.empty?
      read_dates << Time.local(read_date['year'], read_date['month'], read_date['day'])
    end

    reread_dates = []
    reread_dates << scrape_reread_date(page["book_#{i}_link"])
    reread_dates.flatten!

    unless reread_dates.empty?
      reread_dates.each do |date|
        read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
      end
    end

    book_path = page["book_#{i}_link"]
    book_name = scrape_book_name(book_path)
    book_author    = scrape_book_author(book_path)
    book_image_uri = scrape_book_image_uri(book_path)
    book = Book.new(book_name,
                    book_author,
                    read_dates,
                    ROOT_URI + book_path,
                    book_image_uri)
    books << book
  end

  books
end
extract_users(page) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 371
def extract_users(page)
  raise ArgumentError if page.nil?

  users = []
  1.upto(NUM_USERS_PER_PAGE) do |i|
    break if page["user_#{i}_name"].empty?

    user_name = page["user_#{i}_name"]
    user_id   = page["user_#{i}_link"].match(/\/u\/(\d+)$/)[1]
    users << User.new(user_name, user_id, ROOT_URI + "/u/#{user_id}")
  end

  users
end
fetch_books(user_id, uri_method, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 105
def fetch_books(user_id, uri_method, agent = @agent)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
  raise ScraperError if agent.nil?
  return [] unless agent.logged_in?

  books = Books.new
  scraped_pages = scrape_books_pages(user_id, uri_method)
  scraped_pages.each do |page|
    books << extract_books(page)
    books.flatten!
  end
  books
end
fetch_followers(user_id, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 318
def fetch_followers(user_id, agent = @agent)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  raise ScraperError if agent.nil?
  return [] unless agent.logged_in?

  users = []
  scraped_pages = scrape_followers_page(user_id)
  scraped_pages.each do |page|
    users << extract_users(page)
    users.flatten!
  end
  users
end
fetch_followings(user_id, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 303
def fetch_followings(user_id, agent = @agent)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  raise ScraperError if agent.nil?
  return [] unless agent.logged_in?

  users = []
  scraped_pages = user_id == agent.log_in_user_id ? scrape_followings_page(user_id)
                                                  : scrape_others_followings_page(user_id)
  scraped_pages.each do |page|
    users << extract_users(page)
    users.flatten!
  end
  users
end
fetch_profile(user_id, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 79
def fetch_profile(user_id, agent = @agent)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  raise ScraperError if agent.nil?

  Profile.new(*scrape_profile(user_id, agent))
end
fetch_read_books(user_id, target_year_month) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 189
def fetch_read_books(user_id, target_year_month)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  raise ArgumentError if target_year_month.nil?

  result = Books.new
  scrape_books_pages(user_id, :read_books_uri).each do |page|
    first_book_date = scrape_read_date(page['book_1_link'])
    last_book_date  = get_last_book_date(page)

    first_book_year_month = Time.local(first_book_date['year'].to_i, first_book_date['month'].to_i)
    last_book_year_month  = Time.local(last_book_date['year'].to_i, last_book_date['month'].to_i)

    if target_year_month < last_book_year_month
      next
    elsif target_year_month == first_book_year_month && target_year_month > last_book_year_month
      result.concat(fetch_target_books(target_year_month, page))
      break
    elsif target_year_month < first_book_year_month && target_year_month > last_book_year_month
      result.concat(fetch_target_books(target_year_month, page))
      break
    elsif target_year_month <= first_book_year_month && target_year_month >= last_book_year_month
      result.concat(fetch_target_books(target_year_month, page))
    elsif target_year_month > first_book_year_month
      break
    end
  end
  result
end
fetch_target_books(target_year_month, page) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 228
def fetch_target_books(target_year_month, page)
  raise ArgumentError if target_year_month.nil?
  raise ArgumentError if page.nil?

  target_books = Books.new
  1.upto(NUM_BOOKS_PER_PAGE) do |i|
    next if page["book_#{i}_link"].empty?

    read_year_months = []
    read_date  = scrape_read_date(page["book_#{i}_link"])
    read_dates = [Time.local(read_date['year'], read_date['month'], read_date['day'])]
    read_year_months << Time.local(read_date['year'], read_date['month'])

    reread_dates = []
    reread_dates << scrape_reread_date(page["book_#{i}_link"])
    reread_dates.flatten!

    unless reread_dates.empty?
      reread_dates.each do |date|
        read_year_months << Time.local(date['reread_year'], date['reread_month'])
      end
    end

    next unless read_year_months.include?(target_year_month)

    unless reread_dates.empty?
      reread_dates.each do |date|
        read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
      end
    end
    book_path = page["book_#{i}_link"]
    book_name = scrape_book_name(book_path)
    book_author    = scrape_book_author(book_path)
    book_image_uri = scrape_book_image_uri(book_path)
    target_books << Book.new(book_name, book_author, read_dates, ROOT_URI + book_path, book_image_uri)
  end

  target_books
end
get_book_page(book_uri, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 268
def get_book_page(book_uri, agent = @agent)
  @book_pages[book_uri] = agent.get(ROOT_URI + book_uri) unless @book_pages[book_uri]
  @book_pages[book_uri]
end
get_last_book_date(page) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 218
def get_last_book_date(page)
  raise ArgumentError if page.nil?

  NUM_BOOKS_PER_PAGE.downto(1) do |i|
    link = page["book_#{i}_link"]
    next if link.empty?
    return scrape_read_date(link)
  end
end
scrape_book_author(book_uri) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 277
def scrape_book_author(book_uri)
  get_book_page(book_uri).search('#author_name').text
end
scrape_book_image_uri(book_uri) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 281
def scrape_book_image_uri(book_uri)
  get_book_page(book_uri).search('//*[@id="book_image"]/@src').text
end
scrape_book_name(book_uri) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 273
def scrape_book_name(book_uri)
  get_book_page(book_uri).search('#title').text
end
scrape_books_pages(user_id, uri_method, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 120
def scrape_books_pages(user_id, uri_method, agent = @agent)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
  raise ScraperError if agent.nil?
  return [] unless agent.logged_in?

  books_page = agent.get(BookmeterScraper.method(uri_method).call(user_id))

  # if books are not found at all
  return [] if books_page.search('#main_left > div > center > a').empty?

  if books_page.search('span.now_page').empty?
    books_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
      1.upto(NUM_BOOKS_PER_PAGE) do |i|
        send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
        send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
      end
    end
    return [books_root.inject(agent, books_page)]
  end

  books_root = Yasuri.pages_root '//span[@class="now_page"]/following-sibling::span[1]/a' do
    text_page_index '//span[@class="now_page"]/a'
    1.upto(NUM_BOOKS_PER_PAGE) do |i|
      send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
      send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
    end
  end
  books_root.inject(agent, books_page)
end
scrape_followers_page(user_id) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 351
def scrape_followers_page(user_id)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  scrape_users_listing_page(user_id, :followers_uri)
end
scrape_followings_page(user_id, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 332
def scrape_followings_page(user_id, agent = @agent)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  return [] unless agent.logged_in?

  followings_page = agent.get(BookmeterScraper.followings_uri(user_id))
  followings_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
    1.upto(NUM_USERS_PER_PAGE) do |i|
      send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@title")
      send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@href")
    end
  end
  [followings_root.inject(agent, followings_page)]
end
scrape_others_followings_page(user_id) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 346
def scrape_others_followings_page(user_id)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  scrape_users_listing_page(user_id, :followings_uri)
end
scrape_profile(user_id, agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 86
def scrape_profile(user_id, agent)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  raise ScraperError if agent.nil?

  mypage = agent.get(BookmeterScraper.mypage_uri(user_id))

  profile_dl_tags    = mypage.search('#side_left > div.inner > div.profile > dl')
  jp_attribute_names = profile_dl_tags.map { |i| i.children[0].children.text }
  attribute_values   = profile_dl_tags.map { |i| i.children[1].children.text }
  jp_attributes      = Hash[jp_attribute_names.zip(attribute_values)]

  attributes = PROFILE_ATTRIBUTES.map do |attribute|
    jp_attributes[JP_ATTRIBUTE_NAMES[attribute]]
  end
  attributes[0] = mypage.at_css('#side_left > div.inner > h3').text

  attributes
end
scrape_read_date(book_uri, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 285
def scrape_read_date(book_uri, agent = @agent)
  book_date = Yasuri.struct_date '//*[@id="book_edit_area"]/form[1]/div[2]' do
    text_year  '//*[@id="read_date_y"]/option[1]', truncate: /\d+/, proc: :to_i
    text_month '//*[@id="read_date_m"]/option[1]', truncate: /\d+/, proc: :to_i
    text_day   '//*[@id="read_date_d"]/option[1]', truncate: /\d+/, proc: :to_i
  end
  book_date.inject(agent, get_book_page(book_uri))
end
scrape_reread_date(book_uri, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 294
def scrape_reread_date(book_uri, agent = @agent)
  book_reread_date = Yasuri.struct_reread_date '//*[@id="book_edit_area"]/div/form[1]/div[2]' do
    text_reread_year  '//div[@class="reread_box"]/form[1]/div[2]/select[1]/option[1]', truncate: /\d+/, proc: :to_i
    text_reread_month '//div[@class="reread_box"]/form[1]/div[2]/select[2]/option[1]', truncate: /\d+/, proc: :to_i
    text_reread_day   '//div[@class="reread_box"]/form[1]/div[2]/select[3]/option[1]', truncate: /\d+/, proc: :to_i
  end
  book_reread_date.inject(agent, get_book_page(book_uri))
end
scrape_users_listing_page(user_id, uri_method, agent = @agent) click to toggle source
# File lib/bookmeter_scraper/scraper.rb, line 356
def scrape_users_listing_page(user_id, uri_method, agent = @agent)
  raise ArgumentError unless user_id =~ USER_ID_REGEX
  raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
  return [] unless agent.logged_in?

  page = agent.get(BookmeterScraper.method(uri_method).call(user_id))
  root = Yasuri.struct_users '//*[@id="main_left"]/div' do
    1.upto(NUM_USERS_PER_PAGE) do |i|
      send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@title")
      send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@href")
    end
  end
  [root.inject(agent, page)]
end