class WonderScrape::Scrapers::MFC::ItemParser
Constants
- DUPLICATE_FIELD_NAMES
- FIELD_CONTENT_SELECTOR
- FIELD_ELEMENTS_SELECTOR
- FIELD_NAME_SELECTOR
- ID_SELECTOR
- TITLE_SELECTOR
- VALID_FIELD_NAMES
Attributes
item_html[R]
recorder[R]
writer[R]
Public Class Methods
new(writer, recorder, item_html)
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 53 def initialize(writer, recorder, item_html) @writer = writer @recorder = recorder @item_html = item_html @unexpected_fields = [] end
parse(writer, recorder)
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 46 def self.parse(writer, recorder) proc do |item_html_text| item_html = ::Nokogiri::HTML(item_html_text) new(writer, recorder, item_html).parse end end
Public Instance Methods
parse()
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 60 def parse result = {} result['Title'] = parsed_title result.merge! parsed_fields result['Images'] = parsed_images writer.write(result) recorder.increment_items_scraped(result) end
Private Instance Methods
dedupe_field_name(field_name)
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 136 def dedupe_field_name(field_name) DUPLICATE_FIELD_NAMES[field_name] || field_name end
field_content_element_for(field_element)
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 140 def field_content_element_for(field_element) field_element.search(FIELD_CONTENT_SELECTOR) end
field_elements()
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 128 def field_elements item_html.search(FIELD_ELEMENTS_SELECTOR) end
field_name_for(field_element)
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 132 def field_name_for(field_element) field_element.search(FIELD_NAME_SELECTOR).text end
id_element()
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 120 def id_element item_html.search(ID_SELECTOR) end
parsed_fields()
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 82 def parsed_fields fields = {} field_elements.each do |field_element| field_name = dedupe_field_name(field_name_for(field_element)) if unexpected_field?(field_name) recorder.record_unexpected_field(parsed_id, field_name) next end field_content_element = field_content_element_for(field_element) field_value = case field_name when 'Price' FieldParsers::Price.parse(field_content_element) when 'Release dates' FieldParsers::Dates.parse(field_content_element) when 'Events' FieldParsers::Events.parse(field_content_element) when 'Artists', 'Characters', 'Classifications', 'Companies', 'Materials', 'Origins' FieldParsers::StandardList.parse(field_content_element) else FieldParsers::Standard.parse(field_content_element) end fields[field_name] = field_value end fields end
parsed_id()
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 74 def parsed_id id_element.text end
parsed_images()
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 113 def parsed_images images = [] images << FieldParsers::MainImage.parse(item_html) images.concat FieldParsers::AdditionalImages.parse(item_html) images.compact.uniq end
parsed_title()
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 78 def parsed_title title_element.text end
title_element()
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 124 def title_element item_html.search(TITLE_SELECTOR) end
unexpected_field?(field_name)
click to toggle source
# File lib/wonder_scrape/scrapers/mfc/item_parser.rb, line 144 def unexpected_field?(field_name) !VALID_FIELD_NAMES.include?(field_name) end