class Nameko::Mecab

This class is providing a parse method.

 require 'nameko'

 mecab = Nameko::Mecab.new
 mecab.parse("私以外私じゃないの")
# =>
[
  #<MecabNode:0x00007f8f51117348>,
  #<MecabNode:0x00007f8f51116d30>,
  #<MecabNode:0x00007f8f51115610>,
  #<MecabNode:0x00007f8f51115138>,
  #<MecabNode:0x00007f8f51123fa8>,
  #<MecabNode:0x00007f8f51123be8>
]

Public Class Methods

destroy(mecab) click to toggle source
# File lib/nameko/nameko.rb, line 29
def self.destroy(mecab)
  proc {
    mecab_destory(mecab)
  }
end
new(option = '') click to toggle source

Initialize the mecab tagger with the given option.

How to specify options is as follows:

@example

mecab = Nameko::Mecab.new("-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
mecab = Nameko::Mecab.new(["-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
mecab = Nameko::Mecab.new(["-d", "/usr/local/lib/mecab/dic/mecab-ipadic-neologd"])
# File lib/nameko/nameko.rb, line 45
def initialize(option = '')
  option = option.join(' ') if option.is_a? Array

  @mecab = mecab_new2(option)
  ObjectSpace.define_finalizer(self, Mecab.destroy(@mecab))
end

Public Instance Methods

parse(str) click to toggle source

Parse the given string by MeCab. @param [String] str Parsed text @return [Array<MecabNode>] Result of Mecab parsing

@example

node = mecab.parse("私以外私じゃないの")[0]

node.surface # => "私"
node.feature #=> {:pos=>"名詞", :pos1=>"代名詞", :pos2=>"一般", :pos3=>"", :conjugation_form=>"", :conjugation=>"", :base=>"私", :yomi=>"ワタシ", :pronunciation=>"ワタシ"}
node.posid #=> 59
node.id #=> 1
# File lib/nameko/nameko.rb, line 65
def parse(str)
  node = MecabNode.new mecab_sparse_tonode(@mecab, str)
  result = []

  while !node.null? do
    if node.surface.empty?
      node = node.next
      next
    end
    result << node
    node = node.next
  end

  result
end

Private Instance Methods

analyze(mecab_row) click to toggle source
# File lib/nameko/nameko.rb, line 83
def analyze(mecab_row)
  mecab_row.split("\n").select{ |m| m != "EOS"}.map do |sentence|
    sentence.match(/
      ^
      (?<surface>[^\t]+)
      \t
      (?:
        (?<pos>[^,]+),
        \*?(?<pos1>[^,]*),
        \*?(?<pos2>[^,]*),
        \*?(?<pos3>[^,]*),
        \*?(?<conjugation_form>[^,]*),
        \*?(?<conjugation>[^,]*),
        (?<base>[^,]*)
        (?:
          ,(?<yomi>[^,]*)
          ,(?<pronunciation>[^,]*)
        )?
      )?
      /x) do |md|
      md.named_captures.map{ |k, v| [k.to_sym, v] }.to_h
    end
  end
end
fill_up(analysis) click to toggle source
# File lib/nameko/nameko.rb, line 108
def fill_up(analysis)
  analysis.map do |parsed|
    if !parsed[:yomi] && parsed[:surface].match(/\p{katakana}+/)
      parsed[:yomi] = parsed[:surface]
      parsed[:pronunciation] = parsed[:surface]
    end
    parsed
  end
end