module Traject::Macros::NokogiriMacros
Public Instance Methods
default_namespaces()
click to toggle source
# File lib/traject/macros/nokogiri_macros.rb, line 5 def default_namespaces @default_namespaces ||= (settings["nokogiri.namespaces"] || {}).tap { |ns| unless ns.kind_of?(Hash) raise ArgumentError, "nokogiri.namespaces must be a hash, not: #{ns.inspect}" end } end
extract_xpath(xpath, ns: {}, to_text: true)
click to toggle source
# File lib/traject/macros/nokogiri_macros.rb, line 13 def extract_xpath(xpath, ns: {}, to_text: true) if ns && ns.length > 0 namespaces = default_namespaces.merge(ns) else namespaces = default_namespaces end lambda do |record, accumulator| result = record.xpath(xpath, namespaces) if to_text # take all matches, for each match take all # text content, join it together separated with spaces # Make sure to avoid text content that was all blank, which is "between the children" # whitespace. result = result.collect do |n| if n.kind_of?(Nokogiri::XML::Attr) # attribute value n.value else # text from node n.xpath('.//text()').collect(&:text).tap do |arr| arr.reject! { |s| s =~ (/\A\s+\z/) } end.join(" ") end end else # just put all matches in accumulator as Nokogiri::XML::Node's result = result.to_a end accumulator.concat result end end