class Spellr::Tokenizer

Attributes

file[R]
filename[R]
line_tokenizer[R]

Public Class Methods

new(file, start_at: nil, skip_key: true) click to toggle source
# File lib/spellr/tokenizer.rb, line 12
def initialize(file, start_at: nil, skip_key: true)
  @filename = file
  @start_at = start_at || ColumnLocation.new(line_location: LineLocation.new(file))
  @file = file.is_a?(StringIO) || file.is_a?(IO) ? file : ::File.new(file)
  @file.pos = @start_at.line_location.byte_offset

  @line_tokenizer = LineTokenizer.new('', skip_key: skip_key)
end

Public Instance Methods

each_line_with_stats() { |line, line_number, char_offset, byte_offset| ... } click to toggle source
# File lib/spellr/tokenizer.rb, line 55
def each_line_with_stats # rubocop:disable Metrics/MethodLength
  char_offset = @start_at.line_location.char_offset
  byte_offset = @start_at.line_location.byte_offset

  file.each_line.with_index(@start_at.line_location.line_number) do |line, line_number|
    yield line, line_number, char_offset, byte_offset

    char_offset += line.length
    byte_offset += line.bytesize
  end
ensure
  file.close
end
each_term(&block) click to toggle source
# File lib/spellr/tokenizer.rb, line 29
def each_term(&block)
  file.each_line do |line|
    prepare_tokenizer_for_line(line)&.each_term(&block)
  end
ensure
  file.close
end
each_token(skip_term_proc: nil) { |token| ... } click to toggle source
# File lib/spellr/tokenizer.rb, line 37
def each_token(skip_term_proc: nil) # rubocop:disable Metrics/MethodLength
  each_line_with_stats do |line, line_number, char_offset, byte_offset|
    prepare_tokenizer_for_line(line)&.each_token(skip_term_proc: skip_term_proc) do |token|
      token.line = prepare_line(line, line_number, char_offset, byte_offset)

      yield token
    end
  end
end
map(&block) click to toggle source
# File lib/spellr/tokenizer.rb, line 25
def map(&block)
  enum_for(:each_token).map(&block)
end
normalized_terms() click to toggle source
# File lib/spellr/tokenizer.rb, line 69
def normalized_terms
  enum_for(:each_term).map(&:spellr_normalize).uniq.sort
end
prepare_line(line, line_number, char_offset, byte_offset) click to toggle source
# File lib/spellr/tokenizer.rb, line 47
def prepare_line(line, line_number, char_offset, byte_offset)
  line_location = LineLocation.new(
    filename, line_number, char_offset: char_offset, byte_offset: byte_offset
  )
  column_location = ColumnLocation.new(line_location: line_location)
  Token.new(line, location: column_location)
end
terms() click to toggle source
# File lib/spellr/tokenizer.rb, line 21
def terms # leftovers:test
  enum_for(:each_term).to_a
end

Private Instance Methods

prepare_tokenizer_for_line(line) click to toggle source
# File lib/spellr/tokenizer.rb, line 77
def prepare_tokenizer_for_line(line)
  return if line.match?(Spellr::TokenRegexps::SPELLR_LINE_DISABLE_RE)

  line_tokenizer.string = line
  line_tokenizer.pos = 0
  line_tokenizer
end