module MediaartsScraper::Page::CommonTableParser

Constants

KEY_SEPARATOR

Public Instance Methods

parse_common_key_value_table(table) click to toggle source
# File lib/mediaarts_scraper/page/common_table_parser.rb, line 6
def parse_common_key_value_table(table)
  result = {}

  table.xpath("tbody/tr").each do |tr|
    ths = tr.xpath("th")
    tds = tr.xpath("td")

    if ths.count == tds.count
      keys = ths.map(&:text).map(&:strip)

      values = tds.each_with_index.map do |td, i|
        if td.xpath("p").count == 1
          dls = td.xpath("div/div/dl")

          if dls.count > 0
            dls.each do |dl|
              dts = dl.xpath("dt").map(&:text).map(&:strip)
              dds = dl.xpath("dd").map(&:text).map(&:strip)

              if dts.count == dds.count
                dts.each do |dt|
                  dds.each do |dd|
                    result[keys[i] + KEY_SEPARATOR + dt] = dd
                  end
                end
              else
                raise ParseError
              end
            end
          end

          td.xpath("p").first.text.strip
        else
          td.text.strip
        end
      end

      keys.each_with_index do |key, i|
        result[key] = values[i]
      end
    else
      raise ParseError
    end
  end

  result
end
parse_common_serial_rows_table(table) click to toggle source
# File lib/mediaarts_scraper/page/common_table_parser.rb, line 54
def parse_common_serial_rows_table(table)
  header = table.xpath("thead/tr/th").map(&:text).map(&:strip)

  table.xpath("tbody/tr").map do |tr|
    tds = tr.xpath("td")

    data = tds.map { |td|
      child = td.child

      if child
        child.text.strip
      else
        td.text.strip
      end
    }

    tr_result = Hash[*header.zip(data).flatten]

    link_element = tds.detect { |td| td.xpath("a").first }
    tr_result["href"] = link_element.xpath("a").first.attributes["href"].value if link_element

    tr_result
  end
end