class TextExtractor::Record
Attributes
factory[R]
regexp[R]
values[R]
Public Class Methods
new( regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, **_kwargs )
click to toggle source
# File lib/text_extractor/record.rb, line 7 def initialize( regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, **_kwargs ) @factory = factory @constructor = FactoryAnalyzer.new(factory).to_proc @extractor_values = extractor_values @values = values.map { |val| [val.id, val] }.to_h initialize_inline_values(inline) @default_values = values.map { |val| [val.id, nil] }.to_h @regexp = build_regexp(regexp, directives) @fill = Array(fill) end
Public Instance Methods
build_extraction(extracted)
click to toggle source
# File lib/text_extractor/record.rb, line 35 def build_extraction(extracted) return extracted unless @constructor @constructor.call(extracted) end
build_regexp(regexp, directives)
click to toggle source
# File lib/text_extractor/record.rb, line 41 def build_regexp(regexp, directives) stripped = strip_regexp(regexp) final = expand_regexp(stripped, directives) raise EmptyRecordError, 'Empty record detected' if final =~ '' final end
expand_regexp(regexp, directives)
click to toggle source
# File lib/text_extractor/record.rb, line 60 def expand_regexp(regexp, directives) if directives expander = Directives.new(regexp) expanded = expander.expand expander.values.each do |value| values[value.id] = @extractor_values.fetch(value.id, value) end expanded else regexp end end
extract_fills(fill)
click to toggle source
# File lib/text_extractor/record.rb, line 85 def extract_fills(fill) @fill.zip(fill.values_at(*@fill)).to_h end
extract_values(match)
click to toggle source
# File lib/text_extractor/record.rb, line 89 def extract_values(match) values.keys.map { |id| [id, values[id].convert(match[id])] }.to_h end
extraction(match, fill)
click to toggle source
@return Array
# File lib/text_extractor/record.rb, line 28 def extraction(match, fill) extracted = {}.merge!(@default_values) .merge!(extract_fills(fill)) .merge!(extract_values(match)) [build_extraction(extracted)] end
initialize_inline_values(inline_values)
click to toggle source
# File lib/text_extractor/record.rb, line 93 def initialize_inline_values(inline_values) inline_values.each do |value| @values[value] = @extractor_values .fetch(value) { InlineValue.new(value) } end end
match(string, pos = 0)
click to toggle source
# File lib/text_extractor/record.rb, line 73 def match(string, pos = 0) @regexp.match(string, pos) end
options()
click to toggle source
# File lib/text_extractor/record.rb, line 81 def options @regexp.options end
source()
click to toggle source
# File lib/text_extractor/record.rb, line 77 def source @regexp.source end
strip_regexp(regexp)
click to toggle source
# File lib/text_extractor/record.rb, line 50 def strip_regexp(regexp) lines = regexp.source.split("\n") prefix = lines.last if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/ lines.shift lines = lines.map { |s| s.gsub(prefix, '') } end Regexp.new(lines.join("\n"), regexp.options) end