class Spellr::LineTokenizer

Constants

BAYES_KEY_HEURISTIC

Attributes

line[R]
skip_key[R]
skip_key?[R]

Public Class Methods

new(line, skip_key: false) click to toggle source
Calls superclass method
# File lib/spellr/line_tokenizer.rb, line 18
def initialize(line, skip_key: false)
  @line = line
  @skip_key = skip_key
  @disabled = false

  super(@line.to_s)
end

Public Instance Methods

each_term() { |term| ... } click to toggle source
# File lib/spellr/line_tokenizer.rb, line 31
def each_term
  until eos?
    term = next_term
    next if !term || @disabled

    yield term
  end
end
each_token(skip_term_proc: nil) { |token(term, line: line, location: column_location(term))| ... } click to toggle source
# File lib/spellr/line_tokenizer.rb, line 40
def each_token(skip_term_proc: nil) # rubocop:disable Metrics/MethodLength
  until eos?
    term = next_term
    next unless term
    next if @disabled || skip_term_proc&.call(term)

    yield Token.new(term, line: line, location: column_location(term))
  end
end
string=(line) click to toggle source
Calls superclass method
# File lib/spellr/line_tokenizer.rb, line 26
def string=(line)
  @line = line
  super(@line.to_s)
end

Private Instance Methods

column_location(term) click to toggle source
# File lib/spellr/line_tokenizer.rb, line 52
def column_location(term)
  ColumnLocation.new(
    byte_offset: pos - term.bytesize,
    char_offset: charpos - term.length
  )
end
key?(possible_key) click to toggle source
# File lib/spellr/line_tokenizer.rb, line 91
def key?(possible_key)
  return unless possible_key.length >= Spellr.config.key_minimum_length
  # I've come across some large base64 strings by this point they're definitely base64.
  return true if possible_key.length > 200
  return unless possible_key.match?(min_alpha_re) # or there's no point

  BAYES_KEY_HEURISTIC.key?(possible_key)
end
next_term() click to toggle source
# File lib/spellr/line_tokenizer.rb, line 63
def next_term
  return if skip_nonwords_and_flags

  scan_term
end
scan_term() click to toggle source
# File lib/spellr/line_tokenizer.rb, line 69
def scan_term
  term = scan(TERM_RE)

  return term if term && term.length >= Spellr.config.word_minimum_length
end
skip_and_track_disable() click to toggle source
# File lib/spellr/line_tokenizer.rb, line 100
def skip_and_track_disable
  return if @disabled

  skip(SPELLR_DISABLE_RE) && @disabled = true
end
skip_and_track_enable() click to toggle source
# File lib/spellr/line_tokenizer.rb, line 106
def skip_and_track_enable
  return unless @disabled

  skip(SPELLR_ENABLE_RE) && @disabled = false
end
skip_key_heuristically() click to toggle source
# File lib/spellr/line_tokenizer.rb, line 79
def skip_key_heuristically
  return unless skip_key?

  possible_key = check(POSSIBLE_KEY_RE)

  return unless possible_key
  return unless key?(possible_key)

  self.pos += possible_key.bytesize
end
skip_nonwords() click to toggle source
# File lib/spellr/line_tokenizer.rb, line 75
def skip_nonwords
  skip(SKIPS) || skip_key_heuristically || skip(AFTER_KEY_SKIPS)
end
skip_nonwords_and_flags() click to toggle source
# File lib/spellr/line_tokenizer.rb, line 59
def skip_nonwords_and_flags
  skip_nonwords || skip_and_track_enable || skip_and_track_disable
end