class EntityExtractor
Public Class Methods
new(input, extract_from, save_field)
click to toggle source
# File lib/entity_extractor.rb, line 5 def initialize(input, extract_from, save_field) @input = JSON.parse(input) @extract_from = extract_from @save_field = save_field @output = Array.new end
Public Instance Methods
extractSetTerms(to_extract, extract_term_fields, case_sensitive)
click to toggle source
Extracts set terms
# File lib/entity_extractor.rb, line 14 def extractSetTerms(to_extract, extract_term_fields, case_sensitive) @input.each do |item| extract = ExtractSetTerms.new(item, @extract_from, to_extract, extract_term_fields, case_sensitive, @save_field) @output.push(extract.extractTerms) end end
getAllOutput()
click to toggle source
Gets all results in output
# File lib/entity_extractor.rb, line 22 def getAllOutput JSON.pretty_generate(@output) end
getOnlyMatching()
click to toggle source
Gets only the results for which terms were found/extracted
# File lib/entity_extractor.rb, line 27 def getOnlyMatching matches = @output.select { |item| !item[@save_field].empty? } JSON.pretty_generate(matches) end
getTermList()
click to toggle source
Gets a list of the extracted terms by how often they occur
# File lib/entity_extractor.rb, line 33 def getTermList counthash = Hash.new{0} # Increments for each occurrence of term @output.each do |item| item[@save_field].each do |term| counthash[term] += 1 end end # Return hash sorted by value return Hash[counthash.sort_by { |k, v| v}] end