class TextExtractor
represents an extractor definition
Constants
- STRIP_PROCS
- State
Attributes
records[R]
values[R]
Public Class Methods
expand_directives(re)
click to toggle source
# File lib/text_extractor/directives.rb, line 7 def self.expand_directives(re) Directives.new(re).expand end
new(&block)
click to toggle source
# File lib/text_extractor.rb, line 19 def initialize(&block) raise "#{self.class}.new requires a block" unless block initialize_options initialize_collections instance_exec(&block) @append_guards.each { |g| guard(**g, &g[:block]) } end
version()
click to toggle source
# File lib/text_extractor/version.rb, line 2 def self.version '0.6.0' end
Public Instance Methods
append_newline(activate = nil)
click to toggle source
# File lib/text_extractor.rb, line 97 def append_newline(activate = nil) return TextExtractor.append_newline if activate.nil? && @append_newline.nil? return @append_newline if activate.nil? @append_newline = activate end
boolean(id, re = Patterns::BOOLEAN)
click to toggle source
# File lib/text_extractor.rb, line 73 def boolean(id, re = Patterns::BOOLEAN) value(id, re) { |val| !val.match(Patterns::FALSE) } end
factory(object = nil)
click to toggle source
# File lib/text_extractor.rb, line 129 def factory(object = nil) if object @factory = object else @factory end end
filldown(**kwargs, &block)
click to toggle source
# File lib/text_extractor.rb, line 137 def filldown(**kwargs, &block) raise "#{self.class}.filldown requires a block" unless block record(Filldown, **kwargs, &block) end
find_record_for(match)
click to toggle source
# File lib/text_extractor.rb, line 143 def find_record_for(match) records[records.length.times.find_index { |i| match["__#{i}"] }] end
float(id, re = Patterns::FLOAT)
click to toggle source
# File lib/text_extractor.rb, line 81 def float(id, re = Patterns::FLOAT) value(id, re) { |val| Float(val) } end
guard(**kwargs, &block)
click to toggle source
# File lib/text_extractor.rb, line 147 def guard(**kwargs, &block) raise "#{self.class}.guard requires a block" unless block record(Guard, **kwargs, &block) end
guards(*guard_args)
click to toggle source
# File lib/text_extractor.rb, line 153 def guards(*guard_args) guard_args = Guard::DEFAULT if guard_args.empty? @append_guards = guard_args end
initialize_collections()
click to toggle source
# File lib/text_extractor.rb, line 36 def initialize_collections @values = {} @fill = {} @values = {} @records = [] @filldowns = [] @current_record_values = [] @append_guards = [] end
initialize_options()
click to toggle source
# File lib/text_extractor.rb, line 28 def initialize_options @factory = nil @section_delimiter = nil @section_terminator = nil @strip = nil @append_newline = nil end
inline(id, &block)
click to toggle source
# File lib/text_extractor.rb, line 69 def inline(id, &block) @values[id] = InlineValue.new(id, &block) end
integer(id, re = Patterns::INTEGER)
click to toggle source
# File lib/text_extractor.rb, line 77 def integer(id, re = Patterns::INTEGER) value(id, re) { |val| Integer(val) } end
ipaddr(id, re = Patterns::IPADDR)
click to toggle source
# File lib/text_extractor.rb, line 89 def ipaddr(id, re = Patterns::IPADDR) value(id, re) { |val| IPAddr.new(val) } end
ipnetaddr(id, re = Patterns::IPNETADDR)
click to toggle source
# File lib/text_extractor.rb, line 93 def ipnetaddr(id, re = Patterns::IPNETADDR) value(id, re) { |val| IPAddr.new(val) } end
rational(id, re = Patterns::RATIONAL)
click to toggle source
# File lib/text_extractor.rb, line 85 def rational(id, re = Patterns::RATIONAL) value(id, re) { |val| Rational(val) } end
record(klass = Record, **kwargs, &block)
click to toggle source
# File lib/text_extractor.rb, line 104 def record(klass = Record, **kwargs, &block) raise "#{self.class}.record requires a block" unless block kwargs[:extractor_values] = values kwargs[:factory] ||= @factory if @factory kwargs[:values] = @current_record_values = [] @records << klass.new(instance_exec(&block), **kwargs) end
regexps()
click to toggle source
# File lib/text_extractor.rb, line 182 def regexps @records.map.with_index do |record, i| Regexp.new("(?<__#{i}>#{record.source})", record.options) end end
scan(input)
click to toggle source
# File lib/text_extractor.rb, line 158 def scan(input) input = @strip.call(input) if @strip input += "\n" if append_newline && !input.end_with?("\n") prefill = {} sections(input).flat_map { |section| Extraction.new(section, self, prefill).scan.extraction_matches } end
section(delimiter, terminator = nil)
click to toggle source
# File lib/text_extractor.rb, line 113 def section(delimiter, terminator = nil) @section_delimiter = delimiter @section_terminator = terminator end
sections(input)
click to toggle source
# File lib/text_extractor.rb, line 167 def sections(input) return [input] unless @section_delimiter texts = input.split(@section_delimiter) return texts unless @section_terminator texts.map { |section| section + @section_terminator } end
skip(**kwargs, &block)
click to toggle source
# File lib/text_extractor.rb, line 176 def skip(**kwargs, &block) raise "#{self.class}.skip requires a block" unless block record(Skip, **kwargs, &block) end
strip(side = nil)
click to toggle source
# File lib/text_extractor.rb, line 124 def strip(side = nil) @strip = STRIP_PROCS[side] || (raise ArgumentError, 'Unknown strip option') end
to_re()
click to toggle source
# File lib/text_extractor.rb, line 188 def to_re Regexp.union(*regexps) end
value(id, re, &block)
click to toggle source
# File lib/text_extractor.rb, line 61 def value(id, re, &block) val = @values[id] = Value.new(id, re, &block) define_singleton_method(id) do @current_record_values << val "(?<#{id}>#{re.source})" end end