class PerseusMatch::TokenSet

Attributes

form[R]
tokens[R]

Public Class Methods

file?(form) click to toggle source
    # File lib/perseus_match/token_set.rb
146 def file?(form)
147   file = Pathname.new(form).absolute? ? form : File.expand_path(form)
148   file if File.file?(file) && File.readable?(file)
149 end
new(form, tokens = nil) click to toggle source
Calls superclass method
    # File lib/perseus_match/token_set.rb
191 def initialize(form, tokens = nil)
192   super(tokens || self.class.tokenize(form))
193 
194   @form   = form
195   @tokens = to_a
196 end
tokenize(form, unknowns = false) click to toggle source
    # File lib/perseus_match/token_set.rb
 98 def tokenize(form, unknowns = false)
 99   form.downcase!
100   return @tokens[form] if @tokens ||= nil
101 
102   @_tokens = Hash.new
103   @tokens  = Hash.new { |h, k| h[k] = new(k, @_tokens[k] || []) }
104 
105   tokens_file = ENV['PM_TOKENS_FILE'] || 'perseus.tokens'
106 
107   if File.readable?(tokens_file)
108     File.open(tokens_file) { |f| parse(f, unknowns, @_tokens) }
109     @tokens[form]
110   else
111     raise "Lingo installation not found at #{LINGO_BASE}" unless LINGO_FOUND
112 
113     cfg = Tempfile.open(['perseus_match_lingo', '.cfg']) { |t|
114       YAML.dump(LINGO_CONFIG, t)
115     }
116 
117     file = file?(form)
118 
119     if keep = ENV['PM_KEEP_TOKENS']
120       keep = File.expand_path(keep =~ /\A(?:1|y(?:es)?|true)\z/i ? tokens_file : keep)
121     end
122 
123     begin
124       Dir.chdir(LINGO_BASE) {
125         Process.ruby(*%W[lingo.rb -c #{cfg.path}]) { |_, i, o, _|
126           file ? File.foreach(file) { |line| i.puts line } : i.puts(form)
127 
128           i.close_write
129           tokens = o.read
130 
131           File.open(keep, 'w') { |f| f.puts tokens } if keep
132           parse(tokens, unknowns, @_tokens)
133         }
134       }
135     ensure
136       cfg.unlink
137     end
138 
139     unless file
140       tokens, @tokens = @tokens[form], nil
141       tokens
142     end
143   end
144 end

Private Class Methods

parse(output, unknowns = false, tokens = {}) click to toggle source
    # File lib/perseus_match/token_set.rb
153 def parse(output, unknowns = false, tokens = {})
154   sanitize = lambda { |a|
155     a.sub!(Token::WC_RE, '')
156     a.downcase!
157   }
158 
159   output.each_line { |res|
160     case res
161       when /<(.*?)\s=\s\[(.*)\]>/
162         a, b = $1, $2
163         sanitize[a]
164 
165         tokens[a] ||= b.scan(/\((.*?)\+?\)/).flatten.map { |t| Token.new(t) }
166       when /<(.*)>/, /:(.*):/
167         a, b = $1, Token.new($1.downcase)
168         sanitize[a]
169 
170         if unknowns && b.unk?
171           if unknowns.respond_to?(:<<)
172             unknowns << a
173           else
174             warn "UNK: #{a} [#{res.strip}]"
175           end
176         end
177 
178         tokens[a] ||= [b]
179     end
180   }
181 
182   tokens
183 end

Public Instance Methods

==(other) click to toggle source
    # File lib/perseus_match/token_set.rb
231 def ==(other)
232   tokens == other.tokens
233 end
disjoint?(other) click to toggle source
    # File lib/perseus_match/token_set.rb
206 def disjoint?(other)
207   (forms.flatten & other.forms.flatten).flatten.empty?
208 end
distance(other) click to toggle source
    # File lib/perseus_match/token_set.rb
198 def distance(other)
199   (forms | other.forms).size - (forms & other.forms).size
200 end
eql?(other) click to toggle source
    # File lib/perseus_match/token_set.rb
235 def eql?(other)
236   self == other && form == other.form
237 end
excl(wcs) click to toggle source
    # File lib/perseus_match/token_set.rb
218 def excl(wcs)
219   self.class.new(form, reject { |token| token.match?(wcs) })
220 end
forms() click to toggle source
    # File lib/perseus_match/token_set.rb
202 def forms
203   @forms ||= map { |token| token.form }
204 end
incl(wcs) click to toggle source
    # File lib/perseus_match/token_set.rb
214 def incl(wcs)
215   self.class.new(form, select { |token| token.match?(wcs) })
216 end
inclexcl(inclexcl = {}) click to toggle source
    # File lib/perseus_match/token_set.rb
210 def inclexcl(inclexcl = {})
211   incl(inclexcl[:incl] || Token::ANY_WC).excl(inclexcl[:excl])
212 end
inspect() click to toggle source
    # File lib/perseus_match/token_set.rb
239 def inspect
240   "#{super}<#{form}>"
241 end
Also aliased as: to_s
soundex() click to toggle source
    # File lib/perseus_match/token_set.rb
222 def soundex
223   ensure_soundex!
224 
225   @soundex ||= self.class.new(form, map { |token|
226     form = token.form.replace_diacritics.sub(/\W+/, '')
227     Token.new(Text::Soundex.soundex(form) || '', token.wc)
228   })
229 end
to_s()
Alias for: inspect

Private Instance Methods

ensure_soundex!() click to toggle source
    # File lib/perseus_match/token_set.rb
247 def ensure_soundex!
248   unless defined?(Text::Soundex)
249     raise RuntimeError, "Soundex functionality not available", caller(1)
250   end
251 end