class WonderScrape::Scrapers::MFC::ItemParser

Constants

DUPLICATE_FIELD_NAMES
FIELD_CONTENT_SELECTOR
FIELD_ELEMENTS_SELECTOR
FIELD_NAME_SELECTOR
ID_SELECTOR
TITLE_SELECTOR
VALID_FIELD_NAMES

Attributes

item_html[R]
recorder[R]
writer[R]

Public Class Methods

new(writer, recorder, item_html) click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 53
def initialize(writer, recorder, item_html)
  @writer = writer
  @recorder = recorder
  @item_html = item_html
  @unexpected_fields = []
end
parse(writer, recorder) click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 46
def self.parse(writer, recorder)
  proc do |item_html_text|
    item_html = ::Nokogiri::HTML(item_html_text)
    new(writer, recorder, item_html).parse
  end
end

Public Instance Methods

parse() click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 60
def parse
  result = {}
  result['Title'] = parsed_title
  result.merge! parsed_fields
  result['Images'] = parsed_images

  writer.write(result)
  recorder.increment_items_scraped(result)
end

Private Instance Methods

dedupe_field_name(field_name) click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 136
def dedupe_field_name(field_name)
  DUPLICATE_FIELD_NAMES[field_name] || field_name
end
field_content_element_for(field_element) click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 140
def field_content_element_for(field_element)
  field_element.search(FIELD_CONTENT_SELECTOR)
end
field_elements() click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 128
def field_elements
  item_html.search(FIELD_ELEMENTS_SELECTOR)
end
field_name_for(field_element) click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 132
def field_name_for(field_element)
  field_element.search(FIELD_NAME_SELECTOR).text
end
id_element() click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 120
def id_element
  item_html.search(ID_SELECTOR)
end
parsed_fields() click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 82
def parsed_fields
  fields = {}

  field_elements.each do |field_element|
    field_name = dedupe_field_name(field_name_for(field_element))

    if unexpected_field?(field_name)
      recorder.record_unexpected_field(parsed_id, field_name)
      next
    end

    field_content_element = field_content_element_for(field_element)
    field_value = case field_name
                  when 'Price'
                    FieldParsers::Price.parse(field_content_element)
                  when 'Release dates'
                    FieldParsers::Dates.parse(field_content_element)
                  when 'Events'
                    FieldParsers::Events.parse(field_content_element)
                  when 'Artists', 'Characters', 'Classifications', 'Companies', 'Materials', 'Origins'
                    FieldParsers::StandardList.parse(field_content_element)
                  else
                    FieldParsers::Standard.parse(field_content_element)
    end

    fields[field_name] = field_value
  end

  fields
end
parsed_id() click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 74
def parsed_id
  id_element.text
end
parsed_images() click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 113
def parsed_images
  images = []
  images << FieldParsers::MainImage.parse(item_html)
  images.concat FieldParsers::AdditionalImages.parse(item_html)
  images.compact.uniq
end
parsed_title() click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 78
def parsed_title
  title_element.text
end
title_element() click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 124
def title_element
  item_html.search(TITLE_SELECTOR)
end
unexpected_field?(field_name) click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 144
def unexpected_field?(field_name)
  !VALID_FIELD_NAMES.include?(field_name)
end