class Spellr::Tokenizer
Attributes
file[R]
filename[R]
line_tokenizer[R]
Public Class Methods
new(file, start_at: nil, skip_key: true)
click to toggle source
# File lib/spellr/tokenizer.rb, line 12 def initialize(file, start_at: nil, skip_key: true) @filename = file @start_at = start_at || ColumnLocation.new(line_location: LineLocation.new(file)) @file = file.is_a?(StringIO) || file.is_a?(IO) ? file : ::File.new(file) @file.pos = @start_at.line_location.byte_offset @line_tokenizer = LineTokenizer.new('', skip_key: skip_key) end
Public Instance Methods
each_line_with_stats() { |line, line_number, char_offset, byte_offset| ... }
click to toggle source
# File lib/spellr/tokenizer.rb, line 55 def each_line_with_stats # rubocop:disable Metrics/MethodLength char_offset = @start_at.line_location.char_offset byte_offset = @start_at.line_location.byte_offset file.each_line.with_index(@start_at.line_location.line_number) do |line, line_number| yield line, line_number, char_offset, byte_offset char_offset += line.length byte_offset += line.bytesize end ensure file.close end
each_term(&block)
click to toggle source
# File lib/spellr/tokenizer.rb, line 29 def each_term(&block) file.each_line do |line| prepare_tokenizer_for_line(line)&.each_term(&block) end ensure file.close end
each_token(skip_term_proc: nil) { |token| ... }
click to toggle source
# File lib/spellr/tokenizer.rb, line 37 def each_token(skip_term_proc: nil) # rubocop:disable Metrics/MethodLength each_line_with_stats do |line, line_number, char_offset, byte_offset| prepare_tokenizer_for_line(line)&.each_token(skip_term_proc: skip_term_proc) do |token| token.line = prepare_line(line, line_number, char_offset, byte_offset) yield token end end end
map(&block)
click to toggle source
# File lib/spellr/tokenizer.rb, line 25 def map(&block) enum_for(:each_token).map(&block) end
normalized_terms()
click to toggle source
# File lib/spellr/tokenizer.rb, line 69 def normalized_terms enum_for(:each_term).map(&:spellr_normalize).uniq.sort end
prepare_line(line, line_number, char_offset, byte_offset)
click to toggle source
# File lib/spellr/tokenizer.rb, line 47 def prepare_line(line, line_number, char_offset, byte_offset) line_location = LineLocation.new( filename, line_number, char_offset: char_offset, byte_offset: byte_offset ) column_location = ColumnLocation.new(line_location: line_location) Token.new(line, location: column_location) end
terms()
click to toggle source
# File lib/spellr/tokenizer.rb, line 21 def terms # leftovers:test enum_for(:each_term).to_a end
Private Instance Methods
prepare_tokenizer_for_line(line)
click to toggle source
# File lib/spellr/tokenizer.rb, line 77 def prepare_tokenizer_for_line(line) return if line.match?(Spellr::TokenRegexps::SPELLR_LINE_DISABLE_RE) line_tokenizer.string = line line_tokenizer.pos = 0 line_tokenizer end