class TextExtractor

represents an extractor definition

Constants

STRIP_PROCS
State

Attributes

records[R]
values[R]

Public Class Methods

expand_directives(re) click to toggle source
# File lib/text_extractor/directives.rb, line 7
def self.expand_directives(re)
  Directives.new(re).expand
end
new(&block) click to toggle source
# File lib/text_extractor.rb, line 19
def initialize(&block)
  raise "#{self.class}.new requires a block" unless block

  initialize_options
  initialize_collections
  instance_exec(&block)
  @append_guards.each { |g| guard(**g, &g[:block]) }
end
version() click to toggle source
# File lib/text_extractor/version.rb, line 2
def self.version
  '0.6.0'
end

Public Instance Methods

append_newline(activate = nil) click to toggle source
# File lib/text_extractor.rb, line 97
def append_newline(activate = nil)
  return TextExtractor.append_newline if activate.nil? && @append_newline.nil?
  return @append_newline if activate.nil?

  @append_newline = activate
end
boolean(id, re = Patterns::BOOLEAN) click to toggle source
# File lib/text_extractor.rb, line 73
def boolean(id, re = Patterns::BOOLEAN)
  value(id, re) { |val| !val.match(Patterns::FALSE) }
end
factory(object = nil) click to toggle source
# File lib/text_extractor.rb, line 129
def factory(object = nil)
  if object
    @factory = object
  else
    @factory
  end
end
filldown(**kwargs, &block) click to toggle source
# File lib/text_extractor.rb, line 137
def filldown(**kwargs, &block)
  raise "#{self.class}.filldown requires a block" unless block

  record(Filldown, **kwargs, &block)
end
find_record_for(match) click to toggle source
# File lib/text_extractor.rb, line 143
def find_record_for(match)
  records[records.length.times.find_index { |i| match["__#{i}"] }]
end
float(id, re = Patterns::FLOAT) click to toggle source
# File lib/text_extractor.rb, line 81
def float(id, re = Patterns::FLOAT)
  value(id, re) { |val| Float(val) }
end
guard(**kwargs, &block) click to toggle source
# File lib/text_extractor.rb, line 147
def guard(**kwargs, &block)
  raise "#{self.class}.guard requires a block" unless block

  record(Guard, **kwargs, &block)
end
guards(*guard_args) click to toggle source
# File lib/text_extractor.rb, line 153
def guards(*guard_args)
  guard_args = Guard::DEFAULT if guard_args.empty?
  @append_guards = guard_args
end
initialize_collections() click to toggle source
# File lib/text_extractor.rb, line 36
def initialize_collections
  @values = {}
  @fill = {}
  @values = {}
  @records = []
  @filldowns = []
  @current_record_values = []
  @append_guards = []
end
initialize_options() click to toggle source
# File lib/text_extractor.rb, line 28
def initialize_options
  @factory = nil
  @section_delimiter = nil
  @section_terminator = nil
  @strip = nil
  @append_newline = nil
end
inline(id, &block) click to toggle source
# File lib/text_extractor.rb, line 69
def inline(id, &block)
  @values[id] = InlineValue.new(id, &block)
end
integer(id, re = Patterns::INTEGER) click to toggle source
# File lib/text_extractor.rb, line 77
def integer(id, re = Patterns::INTEGER)
  value(id, re) { |val| Integer(val) }
end
ipaddr(id, re = Patterns::IPADDR) click to toggle source
# File lib/text_extractor.rb, line 89
def ipaddr(id, re = Patterns::IPADDR)
  value(id, re) { |val| IPAddr.new(val) }
end
ipnetaddr(id, re = Patterns::IPNETADDR) click to toggle source
# File lib/text_extractor.rb, line 93
def ipnetaddr(id, re = Patterns::IPNETADDR)
  value(id, re) { |val| IPAddr.new(val) }
end
rational(id, re = Patterns::RATIONAL) click to toggle source
# File lib/text_extractor.rb, line 85
def rational(id, re = Patterns::RATIONAL)
  value(id, re) { |val| Rational(val) }
end
record(klass = Record, **kwargs, &block) click to toggle source
# File lib/text_extractor.rb, line 104
def record(klass = Record, **kwargs, &block)
  raise "#{self.class}.record requires a block" unless block

  kwargs[:extractor_values] = values
  kwargs[:factory] ||= @factory if @factory
  kwargs[:values] = @current_record_values = []
  @records << klass.new(instance_exec(&block), **kwargs)
end
regexps() click to toggle source
# File lib/text_extractor.rb, line 182
def regexps
  @records.map.with_index do |record, i|
    Regexp.new("(?<__#{i}>#{record.source})", record.options)
  end
end
scan(input) click to toggle source
# File lib/text_extractor.rb, line 158
def scan(input)
  input = @strip.call(input) if @strip
  input += "\n" if append_newline && !input.end_with?("\n")
  prefill = {}
  sections(input).flat_map { |section|
    Extraction.new(section, self, prefill).scan.extraction_matches
  }
end
section(delimiter, terminator = nil) click to toggle source
# File lib/text_extractor.rb, line 113
def section(delimiter, terminator = nil)
  @section_delimiter = delimiter
  @section_terminator = terminator
end
sections(input) click to toggle source
# File lib/text_extractor.rb, line 167
def sections(input)
  return [input] unless @section_delimiter

  texts = input.split(@section_delimiter)
  return texts unless @section_terminator

  texts.map { |section| section + @section_terminator }
end
skip(**kwargs, &block) click to toggle source
# File lib/text_extractor.rb, line 176
def skip(**kwargs, &block)
  raise "#{self.class}.skip requires a block" unless block

  record(Skip, **kwargs, &block)
end
strip(side = nil) click to toggle source
# File lib/text_extractor.rb, line 124
def strip(side = nil)
  @strip = STRIP_PROCS[side] ||
           (raise ArgumentError, 'Unknown strip option')
end
to_re() click to toggle source
# File lib/text_extractor.rb, line 188
def to_re
  Regexp.union(*regexps)
end
value(id, re, &block) click to toggle source
# File lib/text_extractor.rb, line 61
def value(id, re, &block)
  val = @values[id] = Value.new(id, re, &block)
  define_singleton_method(id) do
    @current_record_values << val
    "(?<#{id}>#{re.source})"
  end
end