class EntityExtractor

Public Class Methods

new(input, extract_from, save_field) click to toggle source
# File lib/entity_extractor.rb, line 5
def initialize(input, extract_from, save_field)
  @input = JSON.parse(input)
  @extract_from = extract_from
  @save_field = save_field

  @output = Array.new
end

Public Instance Methods

extractSetTerms(to_extract, extract_term_fields, case_sensitive) click to toggle source

Extracts set terms

# File lib/entity_extractor.rb, line 14
def extractSetTerms(to_extract, extract_term_fields, case_sensitive)
  @input.each do |item|
    extract = ExtractSetTerms.new(item, @extract_from, to_extract, extract_term_fields, case_sensitive, @save_field)
    @output.push(extract.extractTerms)
  end
end
getAllOutput() click to toggle source

Gets all results in output

# File lib/entity_extractor.rb, line 22
def getAllOutput
  JSON.pretty_generate(@output)
end
getOnlyMatching() click to toggle source

Gets only the results for which terms were found/extracted

# File lib/entity_extractor.rb, line 27
def getOnlyMatching
  matches = @output.select { |item| !item[@save_field].empty? }
  JSON.pretty_generate(matches)
end
getTermList() click to toggle source

Gets a list of the extracted terms by how often they occur

# File lib/entity_extractor.rb, line 33
def getTermList
  counthash = Hash.new{0}

  # Increments for each occurrence of term
  @output.each do |item|
    item[@save_field].each do |term|
      counthash[term] += 1
    end
  end

  # Return hash sorted by value
  return Hash[counthash.sort_by { |k, v| v}]
end