module AttrSimilar::SimilarityMatching
Public Class Methods
find_first_similar(scope, entity, threshold_or_thresholds, attributes)
click to toggle source
# File lib/attr_similar/similarity_matching.rb, line 3 def self.find_first_similar(scope, entity, threshold_or_thresholds, attributes) # Only consider attributes on entity with non-blank values filtered_attributes = attributes.select { |attribute| !entity.send(attribute).blank? } return nil if filtered_attributes.size == 0 # Use single threshold or threshold determined by number of filtered attributes threshold = if threshold_or_thresholds.is_a?(Array) threshold_or_thresholds[filtered_attributes.size - 1] else threshold_or_thresholds end scope = scope.where('id != ?', entity.id) if entity.id # Build up per-attribute entity lists and candidate entity list per_attribute_entity_lists, candidate_entities = filtered_attributes.inject([[], []]) do |memo, attribute| entities = scope.where(attribute => entity.send(attribute)) if entities.size > 0 memo[0].concat([entities]) # Add to per_attribute_entity_lists memo[1].concat(entities) # Add to candidate_entities end memo end # No similar entities if count of per-attribute entity lists is below threshold return nil if per_attribute_entity_lists.size < threshold per_attribute_entity_id_lists = per_attribute_entity_lists.map { |list| list.map(&:id) } candidate_entity_map = candidate_entities.inject({}) do |map, candidate_entity| map[candidate_entity.id] = candidate_entity map end candidate_entity_ids = candidate_entity_map.keys # Find entities that are in at least "threshold" lists candidate_entity_ids.each do |candidate_entity_id| count = per_attribute_entity_id_lists.reduce(0) do |count, list| count += list.include?(candidate_entity_id) ? 1 : 0 end return candidate_entity_map[candidate_entity_id] if count >= threshold end # No similar entities nil end