class Ocawari::Strategy::Ameblo
Constants
- CSS_HIERARCHY_SELECTORS
Private Instance Methods
entry_id()
click to toggle source
# File lib/ocawari/strategy/ameblo.rb, line 46 def entry_id uri.basename.sub("entry-", "").sub(".html", "") end
parse()
click to toggle source
# File lib/ocawari/strategy/ameblo.rb, line 13 def parse script_tag = page.css("script").find { |script| script.text.include?("window.INIT_DATA") } if script_tag # Scrape JSON image_nodes = script_tag.text. split(";window")[0]. sub("window.INIT_DATA=", ""). yield_self { |raw| JSON.parse(raw) }. dig("entryState", "entryMap", entry_id, "entry_text"). yield_self { |html_fragment| Nokogiri::HTML(html_fragment) }. yield_self { |document| document.css("img") }. select { |img| img["src"].include?("/user_images/") } image_nodes.map do |img| img["src"]. sub(/\/t\d+_/, "/o"). sub(/\?caw=800/, "") end else # Scrape HTML page.css(CSS_HIERARCHY_SELECTORS.join(", ")).reduce([]) do |images, node| if /\.jpg|\.png/i.match?(node["src"]) highest_resolution = node["src"].sub(/\/t\d+_/, "/o") images << highest_resolution else images end end end end