class TextExtractor::Directives

Directives can only be named with lowercase ascii letters (a-z) and _ (underscore).

Directives can take an argument. An argument can contain any sequence of characters other than newlines, parenthesis, or dot (.). The argument appears after the name, in parenthesis, with no whitespace between the name and left parenthesis. Whitespace inside the parenthesis is taken literally and not ignored.

When used, each directive name is preceeded by a dot (.). There should be no whitespace on either side of the dot. Some directives can be chained one after another, still using a dot to separate the earlier directive from the later one.

Constants

DIRECTIVE_MAP

Public Class Methods

new(original) click to toggle source
# File lib/text_extractor/directives.rb, line 25
def initialize(original)
  @source = original.source
  @options = original.options
  @output = nil
  @directives = []
end

Public Instance Methods

expand() click to toggle source
# File lib/text_extractor/directives.rb, line 32
def expand
  return @output if @output

  @state = State.new
  scanner = StringScanner.new(@source)
  read_line(scanner) until scanner.eos?
  raise 'Unterminated line group' unless @state.groups.empty?

  @output = Regexp.new(@state.target.join(''), @options)
end
values() click to toggle source
# File lib/text_extractor/directives.rb, line 43
def values
  @directives.flat_map(&:values)
end

Private Instance Methods

add_line() click to toggle source
# File lib/text_extractor/directives.rb, line 73
def add_line
  apply_directives read_directives
  return unless @state.current

  if @state.groups.empty?
    @state.target << @state.current
  else
    @state.groups.last << @state.current
  end
end
apply_directives(directives) click to toggle source
# File lib/text_extractor/directives.rb, line 96
def apply_directives(directives)
  directives.each(&:call)
end
parse_arguments(rule, source) click to toggle source
# File lib/text_extractor/directives.rb, line 122
def parse_arguments(rule, source)
  return [] unless rule
  return rule.call(source) if rule.is_a?(Proc)

  source.match(/\(([^)]*)\)/) { |md| md[1] }
end
parse_directives(full_source) click to toggle source
# File lib/text_extractor/directives.rb, line 100
def parse_directives(full_source)
  return [Comment.new(@state)] if full_source.start_with?(' ')

  split_directives(full_source)
    .map { |source| parse_one_directive(source) }
    .each { |directive| @directives << directive }
end
parse_one_directive(source) click to toggle source
# File lib/text_extractor/directives.rb, line 108
def parse_one_directive(source)
  md = source.match(/^[a-z_]+/) || source.match(/^ /)
  raise "Unknown directive(s) in #{@state.current_line}" unless md

  word = md[0]
  map = DIRECTIVE_MAP.fetch(word) { raise "Unknown directive #{word}" }
  args = parse_arguments(map[:arguments], md.post_match)
  map.fetch(:class).new(@state, *args)
end
read_directives() click to toggle source
# File lib/text_extractor/directives.rb, line 84
def read_directives
  md = @state.current_line.match(/(^| )#\./)

  if md
    @state.current = md.pre_match
    @state.current += "\n" if @state.newline?
    parse_directives(md.post_match.rstrip)
  else
    []
  end
end
read_line(scanner) click to toggle source
# File lib/text_extractor/directives.rb, line 61
def read_line(scanner)
  line = scanner.scan_until(/\n/)

  unless line
    line = scanner.rest
    scanner.skip(/.*/)
  end

  @state.current = @state.current_line = line
  add_line
end
split_directives(source) click to toggle source
# File lib/text_extractor/directives.rb, line 118
def split_directives(source)
  source.split('.')
end