class PerseusMatch::TokenSet
Attributes
form[R]
tokens[R]
Public Class Methods
file?(form)
click to toggle source
# File lib/perseus_match/token_set.rb 146 def file?(form) 147 file = Pathname.new(form).absolute? ? form : File.expand_path(form) 148 file if File.file?(file) && File.readable?(file) 149 end
new(form, tokens = nil)
click to toggle source
Calls superclass method
# File lib/perseus_match/token_set.rb 191 def initialize(form, tokens = nil) 192 super(tokens || self.class.tokenize(form)) 193 194 @form = form 195 @tokens = to_a 196 end
tokenize(form, unknowns = false)
click to toggle source
# File lib/perseus_match/token_set.rb 98 def tokenize(form, unknowns = false) 99 form.downcase! 100 return @tokens[form] if @tokens ||= nil 101 102 @_tokens = Hash.new 103 @tokens = Hash.new { |h, k| h[k] = new(k, @_tokens[k] || []) } 104 105 tokens_file = ENV['PM_TOKENS_FILE'] || 'perseus.tokens' 106 107 if File.readable?(tokens_file) 108 File.open(tokens_file) { |f| parse(f, unknowns, @_tokens) } 109 @tokens[form] 110 else 111 raise "Lingo installation not found at #{LINGO_BASE}" unless LINGO_FOUND 112 113 cfg = Tempfile.open(['perseus_match_lingo', '.cfg']) { |t| 114 YAML.dump(LINGO_CONFIG, t) 115 } 116 117 file = file?(form) 118 119 if keep = ENV['PM_KEEP_TOKENS'] 120 keep = File.expand_path(keep =~ /\A(?:1|y(?:es)?|true)\z/i ? tokens_file : keep) 121 end 122 123 begin 124 Dir.chdir(LINGO_BASE) { 125 Process.ruby(*%W[lingo.rb -c #{cfg.path}]) { |_, i, o, _| 126 file ? File.foreach(file) { |line| i.puts line } : i.puts(form) 127 128 i.close_write 129 tokens = o.read 130 131 File.open(keep, 'w') { |f| f.puts tokens } if keep 132 parse(tokens, unknowns, @_tokens) 133 } 134 } 135 ensure 136 cfg.unlink 137 end 138 139 unless file 140 tokens, @tokens = @tokens[form], nil 141 tokens 142 end 143 end 144 end
Private Class Methods
parse(output, unknowns = false, tokens = {})
click to toggle source
# File lib/perseus_match/token_set.rb 153 def parse(output, unknowns = false, tokens = {}) 154 sanitize = lambda { |a| 155 a.sub!(Token::WC_RE, '') 156 a.downcase! 157 } 158 159 output.each_line { |res| 160 case res 161 when /<(.*?)\s=\s\[(.*)\]>/ 162 a, b = $1, $2 163 sanitize[a] 164 165 tokens[a] ||= b.scan(/\((.*?)\+?\)/).flatten.map { |t| Token.new(t) } 166 when /<(.*)>/, /:(.*):/ 167 a, b = $1, Token.new($1.downcase) 168 sanitize[a] 169 170 if unknowns && b.unk? 171 if unknowns.respond_to?(:<<) 172 unknowns << a 173 else 174 warn "UNK: #{a} [#{res.strip}]" 175 end 176 end 177 178 tokens[a] ||= [b] 179 end 180 } 181 182 tokens 183 end
Public Instance Methods
==(other)
click to toggle source
# File lib/perseus_match/token_set.rb 231 def ==(other) 232 tokens == other.tokens 233 end
disjoint?(other)
click to toggle source
# File lib/perseus_match/token_set.rb 206 def disjoint?(other) 207 (forms.flatten & other.forms.flatten).flatten.empty? 208 end
distance(other)
click to toggle source
# File lib/perseus_match/token_set.rb 198 def distance(other) 199 (forms | other.forms).size - (forms & other.forms).size 200 end
eql?(other)
click to toggle source
# File lib/perseus_match/token_set.rb 235 def eql?(other) 236 self == other && form == other.form 237 end
excl(wcs)
click to toggle source
# File lib/perseus_match/token_set.rb 218 def excl(wcs) 219 self.class.new(form, reject { |token| token.match?(wcs) }) 220 end
forms()
click to toggle source
# File lib/perseus_match/token_set.rb 202 def forms 203 @forms ||= map { |token| token.form } 204 end
incl(wcs)
click to toggle source
# File lib/perseus_match/token_set.rb 214 def incl(wcs) 215 self.class.new(form, select { |token| token.match?(wcs) }) 216 end
inclexcl(inclexcl = {})
click to toggle source
# File lib/perseus_match/token_set.rb 210 def inclexcl(inclexcl = {}) 211 incl(inclexcl[:incl] || Token::ANY_WC).excl(inclexcl[:excl]) 212 end
inspect()
click to toggle source
# File lib/perseus_match/token_set.rb 239 def inspect 240 "#{super}<#{form}>" 241 end
Also aliased as: to_s
soundex()
click to toggle source
# File lib/perseus_match/token_set.rb 222 def soundex 223 ensure_soundex! 224 225 @soundex ||= self.class.new(form, map { |token| 226 form = token.form.replace_diacritics.sub(/\W+/, '') 227 Token.new(Text::Soundex.soundex(form) || '', token.wc) 228 }) 229 end
Private Instance Methods
ensure_soundex!()
click to toggle source
# File lib/perseus_match/token_set.rb 247 def ensure_soundex! 248 unless defined?(Text::Soundex) 249 raise RuntimeError, "Soundex functionality not available", caller(1) 250 end 251 end