class TextExtractor::Record

Attributes

factory[R]
regexp[R]
values[R]

Public Class Methods

new( regexp, factory: nil, values: [], fill: [], directives: true, inline: [], extractor_values: {}, **_kwargs ) click to toggle source
# File lib/text_extractor/record.rb, line 7
def initialize(
  regexp,
  factory: nil,
  values: [],
  fill: [],
  directives: true,
  inline: [],
  extractor_values: {},
  **_kwargs
)
  @factory = factory
  @constructor = FactoryAnalyzer.new(factory).to_proc
  @extractor_values = extractor_values
  @values = values.map { |val| [val.id, val] }.to_h
  initialize_inline_values(inline)
  @default_values = values.map { |val| [val.id, nil] }.to_h
  @regexp = build_regexp(regexp, directives)
  @fill = Array(fill)
end

Public Instance Methods

build_extraction(extracted) click to toggle source
# File lib/text_extractor/record.rb, line 35
def build_extraction(extracted)
  return extracted unless @constructor

  @constructor.call(extracted)
end
build_regexp(regexp, directives) click to toggle source
# File lib/text_extractor/record.rb, line 41
def build_regexp(regexp, directives)
  stripped = strip_regexp(regexp)
  final = expand_regexp(stripped, directives)

  raise EmptyRecordError, 'Empty record detected' if final =~ ''

  final
end
expand_regexp(regexp, directives) click to toggle source
# File lib/text_extractor/record.rb, line 60
def expand_regexp(regexp, directives)
  if directives
    expander = Directives.new(regexp)
    expanded = expander.expand
    expander.values.each do |value|
      values[value.id] = @extractor_values.fetch(value.id, value)
    end
    expanded
  else
    regexp
  end
end
extract_fills(fill) click to toggle source
# File lib/text_extractor/record.rb, line 85
def extract_fills(fill)
  @fill.zip(fill.values_at(*@fill)).to_h
end
extract_values(match) click to toggle source
# File lib/text_extractor/record.rb, line 89
def extract_values(match)
  values.keys.map { |id| [id, values[id].convert(match[id])] }.to_h
end
extraction(match, fill) click to toggle source

@return Array

# File lib/text_extractor/record.rb, line 28
def extraction(match, fill)
  extracted = {}.merge!(@default_values)
                .merge!(extract_fills(fill))
                .merge!(extract_values(match))
  [build_extraction(extracted)]
end
initialize_inline_values(inline_values) click to toggle source
# File lib/text_extractor/record.rb, line 93
def initialize_inline_values(inline_values)
  inline_values.each do |value|
    @values[value] = @extractor_values
                     .fetch(value) { InlineValue.new(value) }
  end
end
match(string, pos = 0) click to toggle source
# File lib/text_extractor/record.rb, line 73
def match(string, pos = 0)
  @regexp.match(string, pos)
end
options() click to toggle source
# File lib/text_extractor/record.rb, line 81
def options
  @regexp.options
end
source() click to toggle source
# File lib/text_extractor/record.rb, line 77
def source
  @regexp.source
end
strip_regexp(regexp) click to toggle source
# File lib/text_extractor/record.rb, line 50
def strip_regexp(regexp)
  lines = regexp.source.split("\n")
  prefix = lines.last
  if lines.first =~ /\A\s*\z/ && prefix =~ /\A\s*\z/
    lines.shift
    lines = lines.map { |s| s.gsub(prefix, '') }
  end
  Regexp.new(lines.join("\n"), regexp.options)
end