module Slaw::Parse::Blocklists

Public Class Methods

adjust_blocklists(doc) click to toggle source
# File lib/slaw/parse/blocklists.rb, line 7
def self.adjust_blocklists(doc)
  nest_blocklists(doc)
  fix_intros(doc)
end
fix_intros(doc) click to toggle source

Change p tags preceding a blocklist into listIntroductions within the blocklist

# File lib/slaw/parse/blocklists.rb, line 177
def self.fix_intros(doc)
  doc.xpath('//a:blockList', a: Slaw.akn_namespace).each do |blocklist|
    prev = blocklist.previous
    if prev and prev.name == 'p'
      prev.name = 'listIntroduction'
      blocklist.prepend_child(prev)
    end
  end
end
guess_number_format(item, prev_format=nil) click to toggle source
# File lib/slaw/parse/blocklists.rb, line 126
def self.guess_number_format(item, prev_format=nil)
  return nil unless item.num

  prev = item.previous_element
  nxt  = item.next_element

  case item.num
  when "(i)"
    # Special case to detect difference between:
    #
    # (h) foo
    # (i) bar
    # (j) baz
    #
    # and
    #
    # (h) foo
    #   (i)  bar
    #   (ii) baz
    #
    # (i) is NOT a sublist if:
    #   - there was a previous item (h), and
    #     - there is not a next item, or
    #     - the next item is something other than (ii)
    if prev and prev.num =~ /^\(h/ and (!nxt or nxt.num != "(ii)")
      NumberingFormat.a
    else
      NumberingFormat.i
    end
  when "(u)", "(v)", "(x)"
    prev_format
  when /^\([ivx]+/
    NumberingFormat.i
  when /^\([IVX]+/
    NumberingFormat.I
  when /^\([a-z]{2}/
    NumberingFormat.aa
  when /^\([A-Z]{2}/
    NumberingFormat.AA
  when /^\([a-z]+/
    NumberingFormat.a
  when /^\([A-Z]+/
    NumberingFormat.A
  when /^\d+(\.\d+)+$/
    NumberingFormat.new(:'i.i', item.num.count('.'))
  else
    NumberingFormat.unknown
  end
end
nest_blocklist_items(items, our_number_format, list, prev) click to toggle source

New blocklist nesting, starting with item as its first element.

# File lib/slaw/parse/blocklists.rb, line 53
def self.nest_blocklist_items(items, our_number_format, list, prev)
  return if items.empty?
  item = items.shift

  sublist_count = 1
  number_format = our_number_format

  while item and item.name == 'item'
    number_format = guess_number_format(item, number_format)
    break unless number_format

    # (aa) after (z) is same numbering type, pretend we've always
    # been this format
    if item.num == "(aa)" and item.previous_element and item.previous_element.num == "(z)"
      our_number_format = number_format
    end

    if number_format != our_number_format
      # new sublist, or back to the old list?
      if number_format < our_number_format
        # back to the old list
        items.unshift(item)
        break
      else
        # new sublist.
        #
        # The blockList is inserted as a child of the sibling just before
        # +item+, and that sibling's content is moved into the
        # +listIntroduction+ of the new list.
        sublist = item.document.create_element('blockList', eId: prev['eId'] + "__list_#{sublist_count}")
        sublist_count += 1

        # list intro
        num = prev.at_xpath('a:num', a: Slaw.akn_namespace)
        if intro = num.next_element
          intro.name = 'listIntroduction'
          sublist << intro
        end

        # make +item+ the first in this list
        item['eId'] = sublist['eId'] + "__item_#{Slaw::Grammars::Counters.clean(item.num)}"
        sublist << item

        # insert this list as a child of the previous item
        prev << sublist

        # now keep walking item's (old) siblings
        # and pull in those elements that match our numbering
        # scheme
        nest_blocklist_items(items, number_format, sublist, item)
      end
    else
      # same number format

      # if this num is (i), we're numbering in :i, this isn't the first
      # element in this list, then assume we're following (h) with (i)
      if number_format.type == :i && item.num == "(i)" && prev
        items.unshift(item)
        break
      else
        # keep it with this list
        if list
          list << item
          item['eId'] = list['eId'] + "__item_#{Slaw::Grammars::Counters.clean(item.num)}"
        end
      end
    end

    prev = item
    item = items.shift
  end
end
nest_blocklists(doc) click to toggle source

Correctly re-nest nested block lists that are tagged with the “renest” attribute.

We do this by identifying the numbering format of each item in the list and comparing it with the surrounding elements. When the numbering format changes, we start a new nested list.

We make sure to handle special cases such as `(i)` coming between `(h)` and `(j)` versus being at the start of a `(i), (ii), (iii)` list.

(a)
(b)
(i)
(ii)
(aa)
(bb)
(c)
(d)

becomes

(a)
(b)
  (i)
  (ii)
    (aa)
    (bb)
(c)
(d)

@param doc [Nokogiri::XML::Document] the document

# File lib/slaw/parse/blocklists.rb, line 43
def self.nest_blocklists(doc)
  doc.xpath('//a:blockList[@renest]', a: Slaw.akn_namespace).each do |blocklist|
    blocklist.remove_attribute('renest')
    items = blocklist.xpath('a:item', a: Slaw.akn_namespace)
    nest_blocklist_items(items.to_a, guess_number_format(items.first), nil, nil) unless items.empty?
  end
end