class LogStash::Filters::Fingerprint

Create consistent hashes (fingerprints) of one or more fields and store the result in a new field.

This can e.g. be used to create consistent document ids when inserting events into Elasticsearch, allowing events in Logstash to cause existing documents to be updated rather than new documents to be created.

NOTE: When using any method other than ‘UUID’, ‘PUNCTUATION’ or ‘MURMUR3’ you must set the key, otherwise the plugin will raise an exception

NOTE: When the ‘target` option is set to `UUID` the result won’t be a consistent hash but a random en.wikipedia.org/wiki/Universally_unique_identifier[UUID]. To generate UUIDs, prefer the <<plugins-filters-uuid,uuid filter>>.

Constants

INTEGER_MAX_32BIT
INTEGER_MIN_32BIT

Public Class Methods

new(*params) click to toggle source
Calls superclass method
# File lib/logstash/filters/fingerprint.rb, line 107
def initialize(*params)
  super
  @target ||= ecs_select[disabled: 'fingerprint', v1: '[event][hash]']
end

Public Instance Methods

filter(event) click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 145
def filter(event)
  case @method
  when :UUID
    event.set(@target, SecureRandom.uuid)
  when :PUNCTUATION
    @source.sort.each do |field|
      next unless event.include?(field)
      # In order to keep some backwards compatibility we should use the unicode version
      # of the regexp because the POSIX one ([[:punct:]]) left some unwanted characters unfiltered (Symbols).
      # gsub(/[^[:punct:]]/,'') should be equivalent to gsub(/[^[\p{P}\p{S}]]/,''), but not 100% in JRuby.
      event.set(@target, event.get(field).gsub(/[^[\p{P}\p{S}]]/,''))
    end
  else
    if @concatenate_sources || @concatenate_all_fields
      to_string = ""
      if @concatenate_all_fields
        deep_sort_hashes(event.to_hash).each do |k,v|
          # Force encoding to UTF-8 to get around https://github.com/jruby/jruby/issues/6748
          to_string << "|#{k}|#{v}".force_encoding("UTF-8")
        end
      else
        @source.sort.each do |k|
          # Force encoding to UTF-8 to get around https://github.com/jruby/jruby/issues/6748
          to_string << "|#{k}|#{deep_sort_hashes(event.get(k))}".force_encoding("UTF-8")
        end
      end
      to_string << "|"
      @logger.debug? && @logger.debug("String built", :to_checksum => to_string)
      event.set(@target, fingerprint(to_string))
    else
      @source.each do |field|
        next unless event.include?(field)
        if event.get(field).is_a?(Array)
          event.set(@target, event.get(field).collect { |v| fingerprint(deep_sort_hashes(v)) })
        else
          event.set(@target, fingerprint(deep_sort_hashes(event.get(field))))
        end
      end
    end
  end
  filter_matched(event)
end
register() click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 112
def register
  # convert to symbol for faster comparisons
  @method = @method.to_sym

  # require any library and set the fingerprint function
  case @method
  when :IPV4_NETWORK
    if @key.nil?
      raise LogStash::ConfigurationError, I18n.t(
        "logstash.runner.configuration.invalid_plugin_register",
        :plugin => "filter",
        :type => "fingerprint",
        :error => "Key value is empty. please fill in a subnet prefix length"
      )
    end
    class << self; alias_method :fingerprint, :fingerprint_ipv4_network; end
  when :MURMUR3
    class << self; alias_method :fingerprint, :fingerprint_murmur3; end
  when :MURMUR3_128
    class << self; alias_method :fingerprint, :fingerprint_murmur3_128; end
  when :UUID
    # nothing
  when :PUNCTUATION
    # nothing
  else
    # force the resolution of OpenSSL class to avoid errors when loaded in multi-threaded
    # #fingerprint_openssl method to instantiate the appropriate digest class.
    # https://github.com/logstash-plugins/logstash-filter-fingerprint/issues/75
    select_digest(@method)
    class << self; alias_method :fingerprint, :fingerprint_openssl; end
  end
end

Private Instance Methods

deep_sort_hashes(object) click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 189
def deep_sort_hashes(object)
  case object
  when Hash
    sorted_hash = Hash.new
    object.sort.each do |sorted_key, value|
      sorted_hash[sorted_key] = deep_sort_hashes(value)
    end
    sorted_hash
  when Array
    object.map {|element| deep_sort_hashes(element) }
  else
    object
  end
end
fingerprint_ipv4_network(ip_string) click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 204
def fingerprint_ipv4_network(ip_string)
  # in JRuby 1.7.11 outputs as US-ASCII
  IPAddr.new(ip_string).mask(@key.value.to_i).to_s.force_encoding(Encoding::UTF_8)
end
fingerprint_murmur3(value) click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 238
def fingerprint_murmur3(value)
  case value
  when Integer
    MurmurHash3::V32.int64_hash(value)
  else
    MurmurHash3::V32.str_hash(value.to_s)
  end
end
fingerprint_murmur3_128(value) click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 247
def fingerprint_murmur3_128(value)
  if value.is_a?(Integer)
    if (INTEGER_MIN_32BIT <= value) && (value <= INTEGER_MAX_32BIT)
      if @base64encode
        [MurmurHash3::V128.int32_hash(value, 2).pack("L*")].pack("m").chomp!
      else
        MurmurHash3::V128.int32_hash(value, 2).pack("L*").unpack("H*")[0]
      end
    else
      if @base64encode
        [MurmurHash3::V128.int64_hash(value, 2).pack("L*")].pack("m").chomp!
      else
        MurmurHash3::V128.int64_hash(value, 2).pack("L*").unpack("H*")[0]
      end
    end
  else
    if @base64encode
      MurmurHash3::V128.str_base64digest(value.to_s, 2)
    else
      MurmurHash3::V128.str_hexdigest(value.to_s, 2)
    end
  end
end
fingerprint_openssl(data) click to toggle source
# File lib/logstash/filters/fingerprint.rb, line 209
def fingerprint_openssl(data)
  # since OpenSSL::Digest instances aren't thread safe, we must ensure that
  # each pipeline worker thread gets its own instance.
  # Also, since a logstash pipeline may contain multiple fingerprint filters
  # we must include the id in the thread local variable name, so that we can
  # store multiple digest instances
  digest_string = "digest-#{id}"
  unless Thread.current[digest_string]
    digest_class = select_digest(@method)
    Thread.current[digest_string] = digest_class.new
  end
  digest = Thread.current[digest_string]
  # in JRuby 1.7.11 outputs as ASCII-8BIT
  if @key.nil?
    if @base64encode
      digest.base64digest(data.to_s).force_encoding(Encoding::UTF_8)
    else
      digest.hexdigest(data.to_s).force_encoding(Encoding::UTF_8)
    end
  else
    if @base64encode
      hash = OpenSSL::HMAC.digest(digest, @key.value, data.to_s)
      Base64.strict_encode64(hash).force_encoding(Encoding::UTF_8)
    else
      OpenSSL::HMAC.hexdigest(digest, @key.value, data.to_s).force_encoding(Encoding::UTF_8)
    end
  end
end
select_digest(method) click to toggle source

Return Class reference more appropriate for the method.

# File lib/logstash/filters/fingerprint.rb, line 272
def select_digest(method)
  case method
  when :SHA1
    OpenSSL::Digest::SHA1
  when :SHA256
    OpenSSL::Digest::SHA256
  when :SHA384
    OpenSSL::Digest::SHA384
  when :SHA512
    OpenSSL::Digest::SHA512
  when :MD5
    OpenSSL::Digest::MD5
  else
    # we really should never get here
    raise(LogStash::ConfigurationError, "Unknown digest for method=#{method.to_s}")
  end
end