class Spellr::LineTokenizer
Constants
- BAYES_KEY_HEURISTIC
Attributes
line[R]
skip_key[R]
skip_key?[R]
Public Class Methods
new(line, skip_key: false)
click to toggle source
Calls superclass method
# File lib/spellr/line_tokenizer.rb, line 18 def initialize(line, skip_key: false) @line = line @skip_key = skip_key @disabled = false super(@line.to_s) end
Public Instance Methods
each_term() { |term| ... }
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 31 def each_term until eos? term = next_term next if !term || @disabled yield term end end
each_token(skip_term_proc: nil) { |token(term, line: line, location: column_location(term))| ... }
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 40 def each_token(skip_term_proc: nil) # rubocop:disable Metrics/MethodLength until eos? term = next_term next unless term next if @disabled || skip_term_proc&.call(term) yield Token.new(term, line: line, location: column_location(term)) end end
string=(line)
click to toggle source
Calls superclass method
# File lib/spellr/line_tokenizer.rb, line 26 def string=(line) @line = line super(@line.to_s) end
Private Instance Methods
column_location(term)
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 52 def column_location(term) ColumnLocation.new( byte_offset: pos - term.bytesize, char_offset: charpos - term.length ) end
key?(possible_key)
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 91 def key?(possible_key) return unless possible_key.length >= Spellr.config.key_minimum_length # I've come across some large base64 strings by this point they're definitely base64. return true if possible_key.length > 200 return unless possible_key.match?(min_alpha_re) # or there's no point BAYES_KEY_HEURISTIC.key?(possible_key) end
next_term()
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 63 def next_term return if skip_nonwords_and_flags scan_term end
scan_term()
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 69 def scan_term term = scan(TERM_RE) return term if term && term.length >= Spellr.config.word_minimum_length end
skip_and_track_disable()
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 100 def skip_and_track_disable return if @disabled skip(SPELLR_DISABLE_RE) && @disabled = true end
skip_and_track_enable()
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 106 def skip_and_track_enable return unless @disabled skip(SPELLR_ENABLE_RE) && @disabled = false end
skip_key_heuristically()
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 79 def skip_key_heuristically return unless skip_key? possible_key = check(POSSIBLE_KEY_RE) return unless possible_key return unless key?(possible_key) self.pos += possible_key.bytesize end
skip_nonwords()
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 75 def skip_nonwords skip(SKIPS) || skip_key_heuristically || skip(AFTER_KEY_SKIPS) end
skip_nonwords_and_flags()
click to toggle source
# File lib/spellr/line_tokenizer.rb, line 59 def skip_nonwords_and_flags skip_nonwords || skip_and_track_enable || skip_and_track_disable end