class LicenseAuto::Similarity
Constants
- SIM_RATIO
Expected similarity ratio
Public Class Methods
new(license_content)
click to toggle source
# File lib/license_auto/license/similarity.rb, line 12 def initialize(license_content) @license_content = license_content.encode('UTF-8', :invalid => :replace, :undef => :replace) # LicenseAuto.logger.debug("\n#{@license_content}") @license_template_documents = LICENSE_SORTED_FREQUENCY.reject {|template_name| abs_filename_path(template_name).nil? }.map {|template_name| abs_file = abs_filename_path(template_name) TfIdfSimilarity::Document.new(File.read(abs_file)) }.compact @license_template_documents.push( TfIdfSimilarity::Document.new(@license_content) ) model = TfIdfSimilarity::TfIdfModel.new(@license_template_documents) @matrix = model.similarity_matrix # LicenseAuto.logger.debug(@license_template_documents) # LicenseAuto.logger.debug(@matrix[0, 2]) end
Public Instance Methods
abs_filename_path(template_name)
click to toggle source
# File lib/license_auto/license/similarity.rb, line 31 def abs_filename_path(template_name) filename_path = "../templates/#{template_name}.txt" abs_filename_path = File.expand_path(filename_path, __FILE__) if FileTest.file?(abs_filename_path) abs_filename_path else LicenseAuto.logger.info("License template file not exist: #{abs_filename_path} !") nil end end
most_license_sim()
click to toggle source
TODO: what will happen if all is 0.00?
# File lib/license_auto/license/similarity.rb, line 43 def most_license_sim license_file_index = @license_template_documents.count - 1 sim_ratios = @license_template_documents[0..(license_file_index -1)].map.with_index { |doc, index| ratio_ = @matrix[license_file_index, index] } max_sim_ratio = sim_ratios.max sim_license_index = sim_ratios.index(max_sim_ratio) license_name = LICENSE_SORTED_FREQUENCY[sim_license_index] debug = "License: #{license_name}, Ratio: #{max_sim_ratio}, license_content:\n #{@license_content[0..70]}..." LicenseAuto.logger.debug(debug) [license_name, max_sim_ratio] end