class Ocawari::Strategy::Ameblo

Constants

CSS_HIERARCHY_SELECTORS

Private Instance Methods

entry_id() click to toggle source
# File lib/ocawari/strategy/ameblo.rb, line 46
def entry_id
  uri.basename.sub("entry-", "").sub(".html", "")
end
parse() click to toggle source
# File lib/ocawari/strategy/ameblo.rb, line 13
def parse
  script_tag = page.css("script").find { |script| script.text.include?("window.INIT_DATA") }
  
  if script_tag
    # Scrape JSON
    image_nodes = script_tag.text.
      split(";window")[0].
      sub("window.INIT_DATA=", "").
      yield_self { |raw| JSON.parse(raw) }.
      dig("entryState", "entryMap", entry_id, "entry_text").
      yield_self { |html_fragment| Nokogiri::HTML(html_fragment) }.
      yield_self { |document| document.css("img") }.
      select { |img| img["src"].include?("/user_images/") }

    image_nodes.map do |img|
      img["src"].
        sub(/\/t\d+_/, "/o").
        sub(/\?caw=800/, "")
    end
  else
    # Scrape HTML
    page.css(CSS_HIERARCHY_SELECTORS.join(", ")).reduce([]) do |images, node|
      if /\.jpg|\.png/i.match?(node["src"])
        highest_resolution = node["src"].sub(/\/t\d+_/, "/o")
        images << highest_resolution
      else
        images
      end
    end
  end
end