module Linguakit

Constants

DEFAULT_COMMAND
DEFAULT_COMMAND_STR

Public Class Methods

get_phrases(item) click to toggle source
# File lib/linguakit_ruby.rb, line 90
def get_phrases item
  case item_config(item)[:type]
  when :str
    items_to_array keyword(item[:data])
  when :arr
    item_config(item)[:data]
  end
end
get_score(principal_items, secondary_items, **args) click to toggle source
# File lib/linguakit_ruby.rb, line 99
def get_score(principal_items, secondary_items, **args)
  _options = { score: args[:score] || 0.8 }
  principal_phrases = get_phrases principal_items
  secondary_phrases = get_phrases secondary_items
  final_score = secondary_phrases.map{ |phrase|
    match = FuzzyMatch.new(principal_phrases).find(phrase, {find_with_score: true})
    match[1] if match && match[1] >= _options[:score]
  }.reject(&:nil?).sum
  (final_score * 100) / principal_phrases.length
end
item_config(item) click to toggle source
# File lib/linguakit_ruby.rb, line 83
def item_config item
  {
    data: item[:data] || "",
    type: item[:type] || :str
  }
end
items_to_array(items) click to toggle source
# File lib/linguakit_ruby.rb, line 72
def items_to_array items
  items.map{|item| item[:phrase]}
end
keyphrases(input, **args) click to toggle source
# File lib/linguakit_ruby.rb, line 29
def keyphrases input, **args
  # -s         = input is a string and not a file
  # -chi       = chi-square co-occurrence measure
  # -log       = loglikelihood
  # -scp       = symmetrical conditional probability
  # -mi        = mutual information
  # -cooc      = co-occurrence counting
  config = {
    module: 'mwe',
    input: str_to_file(input),
    lang: args[:lang] || 'es',
    options: args[:opts] || '-chi'
  }
  result = Open3.capture3 DEFAULT_COMMAND % config
  items = result[0].split("\n")
  items.map{|item|
    object = item.split("\t")
    {
      phrase: object[0],
      rank: object[1].to_f,
      composition: object[2]
    }
  }
end
keyword(input, **args) click to toggle source
# File lib/linguakit_ruby.rb, line 54
def keyword input, **args
  config = {
    module: 'key',
    input: str_to_file(input),
    lang: args[:lang] || 'es'
  }
  result = Open3.capture3 DEFAULT_COMMAND % config
  items = result[0].split("\n")
  items.map{|item|
    object = item.split("\t")
    {
      phrase: object[0],
      rank: object[1].to_f,
      composition: object[2]
    }
  }
end
sentiment(input, **args) click to toggle source
# File lib/linguakit_ruby.rb, line 13
def sentiment input, **args
  # -s         = input is a string and not a file
  config = {
    module: 'sent',
    input: input,
    lang: args[:lang] || 'es',
    options: args[:opts]
  }
  command = args[:opts] == '-s' ? DEFAULT_COMMAND_STR : DEFAULT_COMMAND
  result = Open3.capture3 command % config
  {
    emotion: result[0].split("\t")[1],
    point: result[0].split("\t")[2].split("\n")[0].to_f
  }
end
str_to_file(str) click to toggle source
# File lib/linguakit_ruby.rb, line 76
def str_to_file str
  file = Tempfile.new(['data', '.txt'], "#{ Dir.pwd}/tmp", encoding: 'utf-8')
  file.write str
  file.close
  file.path
end