class LogStash::Filters::Fingerprint
Create consistent hashes (fingerprints) of one or more fields and store the result in a new field.
This can e.g. be used to create consistent document ids when inserting events into Elasticsearch, allowing events in Logstash to cause existing documents to be updated rather than new documents to be created.
NOTE: When using any method other than ‘UUID’, ‘PUNCTUATION’ or ‘MURMUR3’ you must set the key, otherwise the plugin will raise an exception
NOTE: When the ‘target` option is set to `UUID` the result won’t be a consistent hash but a random en.wikipedia.org/wiki/Universally_unique_identifier[UUID]. To generate UUIDs, prefer the <<plugins-filters-uuid,uuid filter>>.
Constants
- INTEGER_MAX_32BIT
- INTEGER_MIN_32BIT
Public Class Methods
new(*params)
click to toggle source
Calls superclass method
# File lib/logstash/filters/fingerprint.rb, line 107 def initialize(*params) super @target ||= ecs_select[disabled: 'fingerprint', v1: '[event][hash]'] end
Public Instance Methods
filter(event)
click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 145 def filter(event) case @method when :UUID event.set(@target, SecureRandom.uuid) when :PUNCTUATION @source.sort.each do |field| next unless event.include?(field) # In order to keep some backwards compatibility we should use the unicode version # of the regexp because the POSIX one ([[:punct:]]) left some unwanted characters unfiltered (Symbols). # gsub(/[^[:punct:]]/,'') should be equivalent to gsub(/[^[\p{P}\p{S}]]/,''), but not 100% in JRuby. event.set(@target, event.get(field).gsub(/[^[\p{P}\p{S}]]/,'')) end else if @concatenate_sources || @concatenate_all_fields to_string = "" if @concatenate_all_fields deep_sort_hashes(event.to_hash).each do |k,v| # Force encoding to UTF-8 to get around https://github.com/jruby/jruby/issues/6748 to_string << "|#{k}|#{v}".force_encoding("UTF-8") end else @source.sort.each do |k| # Force encoding to UTF-8 to get around https://github.com/jruby/jruby/issues/6748 to_string << "|#{k}|#{deep_sort_hashes(event.get(k))}".force_encoding("UTF-8") end end to_string << "|" @logger.debug? && @logger.debug("String built", :to_checksum => to_string) event.set(@target, fingerprint(to_string)) else @source.each do |field| next unless event.include?(field) if event.get(field).is_a?(Array) event.set(@target, event.get(field).collect { |v| fingerprint(deep_sort_hashes(v)) }) else event.set(@target, fingerprint(deep_sort_hashes(event.get(field)))) end end end end filter_matched(event) end
register()
click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 112 def register # convert to symbol for faster comparisons @method = @method.to_sym # require any library and set the fingerprint function case @method when :IPV4_NETWORK if @key.nil? raise LogStash::ConfigurationError, I18n.t( "logstash.runner.configuration.invalid_plugin_register", :plugin => "filter", :type => "fingerprint", :error => "Key value is empty. please fill in a subnet prefix length" ) end class << self; alias_method :fingerprint, :fingerprint_ipv4_network; end when :MURMUR3 class << self; alias_method :fingerprint, :fingerprint_murmur3; end when :MURMUR3_128 class << self; alias_method :fingerprint, :fingerprint_murmur3_128; end when :UUID # nothing when :PUNCTUATION # nothing else # force the resolution of OpenSSL class to avoid errors when loaded in multi-threaded # #fingerprint_openssl method to instantiate the appropriate digest class. # https://github.com/logstash-plugins/logstash-filter-fingerprint/issues/75 select_digest(@method) class << self; alias_method :fingerprint, :fingerprint_openssl; end end end
Private Instance Methods
deep_sort_hashes(object)
click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 189 def deep_sort_hashes(object) case object when Hash sorted_hash = Hash.new object.sort.each do |sorted_key, value| sorted_hash[sorted_key] = deep_sort_hashes(value) end sorted_hash when Array object.map {|element| deep_sort_hashes(element) } else object end end
fingerprint_ipv4_network(ip_string)
click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 204 def fingerprint_ipv4_network(ip_string) # in JRuby 1.7.11 outputs as US-ASCII IPAddr.new(ip_string).mask(@key.value.to_i).to_s.force_encoding(Encoding::UTF_8) end
fingerprint_murmur3(value)
click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 238 def fingerprint_murmur3(value) case value when Integer MurmurHash3::V32.int64_hash(value) else MurmurHash3::V32.str_hash(value.to_s) end end
fingerprint_murmur3_128(value)
click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 247 def fingerprint_murmur3_128(value) if value.is_a?(Integer) if (INTEGER_MIN_32BIT <= value) && (value <= INTEGER_MAX_32BIT) if @base64encode [MurmurHash3::V128.int32_hash(value, 2).pack("L*")].pack("m").chomp! else MurmurHash3::V128.int32_hash(value, 2).pack("L*").unpack("H*")[0] end else if @base64encode [MurmurHash3::V128.int64_hash(value, 2).pack("L*")].pack("m").chomp! else MurmurHash3::V128.int64_hash(value, 2).pack("L*").unpack("H*")[0] end end else if @base64encode MurmurHash3::V128.str_base64digest(value.to_s, 2) else MurmurHash3::V128.str_hexdigest(value.to_s, 2) end end end
fingerprint_openssl(data)
click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 209 def fingerprint_openssl(data) # since OpenSSL::Digest instances aren't thread safe, we must ensure that # each pipeline worker thread gets its own instance. # Also, since a logstash pipeline may contain multiple fingerprint filters # we must include the id in the thread local variable name, so that we can # store multiple digest instances digest_string = "digest-#{id}" unless Thread.current[digest_string] digest_class = select_digest(@method) Thread.current[digest_string] = digest_class.new end digest = Thread.current[digest_string] # in JRuby 1.7.11 outputs as ASCII-8BIT if @key.nil? if @base64encode digest.base64digest(data.to_s).force_encoding(Encoding::UTF_8) else digest.hexdigest(data.to_s).force_encoding(Encoding::UTF_8) end else if @base64encode hash = OpenSSL::HMAC.digest(digest, @key.value, data.to_s) Base64.strict_encode64(hash).force_encoding(Encoding::UTF_8) else OpenSSL::HMAC.hexdigest(digest, @key.value, data.to_s).force_encoding(Encoding::UTF_8) end end end
select_digest(method)
click to toggle source
Return Class reference more appropriate for the method.
# File lib/logstash/filters/fingerprint.rb, line 272 def select_digest(method) case method when :SHA1 OpenSSL::Digest::SHA1 when :SHA256 OpenSSL::Digest::SHA256 when :SHA384 OpenSSL::Digest::SHA384 when :SHA512 OpenSSL::Digest::SHA512 when :MD5 OpenSSL::Digest::MD5 else # we really should never get here raise(LogStash::ConfigurationError, "Unknown digest for method=#{method.to_s}") end end