class PxIndexBuilder
Attributes
to_h[R]
to_s[R]
to_xml[R]
Public Class Methods
new(obj, debug: false, ignore: [])
click to toggle source
# File lib/pxindex-builder.rb, line 15 def initialize(obj, debug: false, ignore: []) @debug = debug puts 'inside initialize: ' if @debug if obj.is_a? String then s, _ = RXFHelper.read(obj) s =~ /^---/ ? import_phrases(YAML.load(s), s, ignore) : import_index(s) elsif obj.is_a? Hash import_phrases obj, s, ignore end end
Private Instance Methods
import_index(raw_s)
click to toggle source
# File lib/pxindex-builder.rb, line 35 def import_index(raw_s) # find the entries which aren't on the main index s = raw_s.sub(/<[^>]+>\n/,'') doc = LineTree.new(s, debug: @debug).to_doc(encapsulate: true) a = doc.root.xpath('entry/text()') puts 'a: ' + a.inspect if @debug puts 'doc: ' + doc.xml if @debug a2 = doc.root.xpath('entry//entry/text()') puts 'a2: ' + a2.inspect if @debug a3 = a2.map(&:rstrip) - a.map(&:rstrip) puts 'a3:' + a3.inspect if @debug # add the new entries to the main index s << "\n" + a3.join("\n") s.prepend '<?ph schema="entries/section[heading]/entry[title, url]"?> ' @to_s = s end
import_phrases(h, s, ignore=[])
click to toggle source
# File lib/pxindex-builder.rb, line 59 def import_phrases(h, s, ignore=[]) words = h.keys.join(' ').split(/ +/).map {|x| x[/\w+/]}.uniq\ #.tap {|x| puts 't: ' + x.inspect} .reject {|x| x.length < 3}\ .reject {|x| ignore.include? x}\ .reject {|x| x.length < 4 and !WordsDotDat.list.include? x.downcase}\ .group_by(&:chr).sort pl = PhraseLookup.new s index = words.map do |letter, list| a = list.map do |w| phrases = pl.q(w) [w, phrases, phrases.map {|x| h[x] }.max] end [letter, a.sort_by(&:last).reverse] end @to_h = h = scan(index) puts 'h: ' + h.inspect if @debug @to_xml = PolyrexBuilder.new(h, parents: %i(entry)).to_xml end
scan(rows)
click to toggle source
# File lib/pxindex-builder.rb, line 90 def scan(rows) rows.map do |x| head, body, _ = x a = [{title: head}] a << scan(body) if body and body.any? a end end