module PgHero::Methods::QueryStats

Public Instance Methods

capture_query_stats(raise_errors: false) click to toggle source

resetting query stats will reset across the entire Postgres instance this is problematic if multiple PgHero databases use the same Postgres instance

to get around this, we capture queries for every Postgres database before we reset query stats for the Postgres instance with the `capture_query_stats` option

# File lib/pghero/methods/query_stats.rb, line 129
def capture_query_stats(raise_errors: false)
  return if config["capture_query_stats"] && config["capture_query_stats"] != true

  # get all databases that use same query stats and build mapping
  mapping = {id => database_name}
  PgHero.databases.select { |_, d| d.config["capture_query_stats"] == id }.each do |_, d|
    mapping[d.id] = d.database_name
  end

  now = Time.now

  query_stats = {}
  mapping.each do |database_id, database_name|
    query_stats[database_id] = query_stats(limit: 1000000, database: database_name)
  end

  query_stats = query_stats.select { |_, v| v.any? }

  # nothing to do
  return if query_stats.empty?

  # use mapping, not query stats here
  # TODO add option for this, and make default in PgHero 3.0
  if false # mapping.size == 1 && server_version_num >= 120000
    query_stats.each do |db_id, db_query_stats|
      if reset_query_stats(database: mapping[db_id], raise_errors: raise_errors)
        insert_query_stats(db_id, db_query_stats, now)
      end
    end
  else
    if reset_query_stats(raise_errors: raise_errors)
      query_stats.each do |db_id, db_query_stats|
        insert_query_stats(db_id, db_query_stats, now)
      end
    end
  end
end
clean_query_stats() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 167
def clean_query_stats
  PgHero::QueryStats.where(database: id).where("captured_at < ?", 14.days.ago).delete_all
end
disable_query_stats() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 54
def disable_query_stats
  execute("DROP EXTENSION IF EXISTS pg_stat_statements")
  true
end
enable_query_stats() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 49
def enable_query_stats
  execute("CREATE EXTENSION IF NOT EXISTS pg_stat_statements")
  true
end
historical_query_stats_enabled?() click to toggle source

stackoverflow.com/questions/20582500/how-to-check-if-a-table-exists-in-a-given-schema

# File lib/pghero/methods/query_stats.rb, line 106
def historical_query_stats_enabled?
  # TODO use schema from config
  # make sure primary database is PostgreSQL first
  query_stats_table_exists? && capture_query_stats? && !missing_query_stats_columns.any?
end
missing_query_stats_columns() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 116
def missing_query_stats_columns
  %w(query_hash user) - PgHero::QueryStats.column_names
end
query_hash_stats(query_hash, user: nil) click to toggle source
# File lib/pghero/methods/query_stats.rb, line 176
      def query_hash_stats(query_hash, user: nil)
        if historical_query_stats_enabled? && supports_query_hash?
          start_at = 24.hours.ago
          select_all_stats <<-SQL
            SELECT
              captured_at,
              total_time / 1000 / 60 AS total_minutes,
              (total_time / calls) AS average_time,
              calls,
              (SELECT regexp_matches(query, '.*/\\*(.+?)\\*/'))[1] AS origin
            FROM
              pghero_query_stats
            WHERE
              database = #{quote(id)}
              AND captured_at >= #{quote(start_at)}
              AND query_hash = #{quote(query_hash)}
              #{user ? "AND \"user\" = #{quote(user)}" : ""}
            ORDER BY
              1 ASC
          SQL
        else
          raise NotEnabled, "Query hash stats not enabled"
        end
      end
query_stats(historical: false, start_at: nil, end_at: nil, min_average_time: nil, min_calls: nil, **options) click to toggle source
# File lib/pghero/methods/query_stats.rb, line 4
def query_stats(historical: false, start_at: nil, end_at: nil, min_average_time: nil, min_calls: nil, **options)
  current_query_stats = historical && end_at && end_at < Time.now ? [] : current_query_stats(**options)
  historical_query_stats = historical && historical_query_stats_enabled? ? historical_query_stats(start_at: start_at, end_at: end_at, **options) : []

  query_stats = combine_query_stats((current_query_stats + historical_query_stats).group_by { |q| [q[:query_hash], q[:user]] })
  query_stats = combine_query_stats(query_stats.group_by { |q| [normalize_query(q[:query]), q[:user]] })

  # add percentages
  all_queries_total_minutes = [current_query_stats, historical_query_stats].sum { |s| (s.first || {})[:all_queries_total_minutes] || 0 }
  query_stats.each do |query|
    query[:average_time] = query[:total_minutes] * 1000 * 60 / query[:calls]
    query[:total_percent] = query[:total_minutes] * 100.0 / all_queries_total_minutes
  end

  sort = options[:sort] || "total_minutes"
  query_stats = query_stats.sort_by { |q| -q[sort.to_sym] }.first(100)
  if min_average_time
    query_stats.reject! { |q| q[:average_time] < min_average_time }
  end
  if min_calls
    query_stats.reject! { |q| q[:calls] < min_calls }
  end
  query_stats
end
query_stats_available?() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 29
def query_stats_available?
  select_one("SELECT COUNT(*) AS count FROM pg_available_extensions WHERE name = 'pg_stat_statements'") > 0
end
query_stats_enabled?() click to toggle source

only cache if true

# File lib/pghero/methods/query_stats.rb, line 34
def query_stats_enabled?
  @query_stats_enabled ||= query_stats_readable?
end
query_stats_extension_enabled?() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 38
def query_stats_extension_enabled?
  select_one("SELECT COUNT(*) AS count FROM pg_extension WHERE extname = 'pg_stat_statements'") > 0
end
query_stats_readable?() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 42
def query_stats_readable?
  select_all("SELECT * FROM pg_stat_statements LIMIT 1")
  true
rescue ActiveRecord::StatementInvalid
  false
end
query_stats_table_exists?() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 112
def query_stats_table_exists?
  table_exists?("pghero_query_stats")
end
reset_instance_query_stats(database: nil, user: nil, query_hash: nil, raise_errors: false) click to toggle source

resets query stats for the entire instance it's possible to reset stats for a specific database, user or query hash in Postgres 12+

# File lib/pghero/methods/query_stats.rb, line 68
def reset_instance_query_stats(database: nil, user: nil, query_hash: nil, raise_errors: false)
  if database || user || query_hash
    raise PgHero::Error, "Requires PostgreSQL 12+" if server_version_num < 120000

    if database
      database_id = execute("SELECT oid FROM pg_database WHERE datname = #{quote(database)}").first.try(:[], "oid")
      raise PgHero::Error, "Database not found: #{database}" unless database_id
    else
      database_id = 0
    end

    if user
      user_id = execute("SELECT usesysid FROM pg_user WHERE usename = #{quote(user)}").first.try(:[], "usesysid")
      raise PgHero::Error, "User not found: #{user}" unless user_id
    else
      user_id = 0
    end

    if query_hash
      query_id = query_hash.to_i
      # may not be needed
      # but not intuitive that all query hashes are reset with 0
      raise PgHero::Error, "Invalid query hash: #{query_hash}" if query_id == 0
    else
      query_id = 0
    end

    execute("SELECT pg_stat_statements_reset(#{quote(user_id.to_i)}, #{quote(database_id.to_i)}, #{quote(query_id.to_i)})")
  else
    execute("SELECT pg_stat_statements_reset()")
  end
  true
rescue ActiveRecord::StatementInvalid => e
  raise e if raise_errors
  false
end
reset_query_stats(**options) click to toggle source

TODO scope by database in PgHero 3.0 (add database: database_name to options)

# File lib/pghero/methods/query_stats.rb, line 61
def reset_query_stats(**options)
  reset_instance_query_stats(**options)
end
slow_queries(query_stats: nil, **options) click to toggle source
# File lib/pghero/methods/query_stats.rb, line 171
def slow_queries(query_stats: nil, **options)
  query_stats ||= self.query_stats(options)
  query_stats.select { |q| q[:calls].to_i >= slow_query_calls.to_i && q[:average_time].to_f >= slow_query_ms.to_f }
end
supports_query_hash?() click to toggle source
# File lib/pghero/methods/query_stats.rb, line 120
def supports_query_hash?
  server_version_num >= 90400
end

Private Instance Methods

combine_query_stats(grouped_stats) click to toggle source
# File lib/pghero/methods/query_stats.rb, line 302
def combine_query_stats(grouped_stats)
  query_stats = []
  grouped_stats.each do |_, stats2|
    value = {
      query: (stats2.find { |s| s[:query] } || {})[:query],
      user: (stats2.find { |s| s[:user] } || {})[:user],
      query_hash: (stats2.find { |s| s[:query_hash] } || {})[:query_hash],
      total_minutes: stats2.sum { |s| s[:total_minutes] },
      calls: stats2.sum { |s| s[:calls] }.to_i,
      all_queries_total_minutes: stats2.sum { |s| s[:all_queries_total_minutes] }
    }
    value[:total_percent] = value[:total_minutes] * 100.0 / value[:all_queries_total_minutes]
    value[:explainable_query] = stats2.map { |s| s[:explainable_query] }.select { |q| q && explainable?(q) }.first
    query_stats << value
  end
  query_stats
end
current_query_stats(limit: nil, sort: nil, database: nil, query_hash: nil) click to toggle source

www.craigkerstiens.com/2013/01/10/more-on-postgres-performance/

# File lib/pghero/methods/query_stats.rb, line 204
      def current_query_stats(limit: nil, sort: nil, database: nil, query_hash: nil)
        if query_stats_enabled?
          limit ||= 100
          sort ||= "total_minutes"
          total_time = server_version_num >= 130000 ? "(total_plan_time + total_exec_time)" : "total_time"
          query = <<-SQL
            WITH query_stats AS (
              SELECT
                LEFT(query, 10000) AS query,
                #{supports_query_hash? ? "queryid" : "md5(query)"} AS query_hash,
                rolname AS user,
                (#{total_time} / 1000 / 60) AS total_minutes,
                (#{total_time} / calls) AS average_time,
                calls
              FROM
                pg_stat_statements
              INNER JOIN
                pg_database ON pg_database.oid = pg_stat_statements.dbid
              INNER JOIN
                pg_roles ON pg_roles.oid = pg_stat_statements.userid
              WHERE
                calls > 0 AND
                pg_database.datname = #{database ? quote(database) : "current_database()"}
                #{query_hash ? "AND queryid = #{quote(query_hash)}" : nil}
            )
            SELECT
              query,
              query_hash,
              query_stats.user,
              total_minutes,
              average_time,
              calls,
              total_minutes * 100.0 / (SELECT SUM(total_minutes) FROM query_stats) AS total_percent,
              (SELECT SUM(total_minutes) FROM query_stats) AS all_queries_total_minutes
            FROM
              query_stats
            ORDER BY
              #{quote_table_name(sort)} DESC
            LIMIT #{limit.to_i}
          SQL

          # we may be able to skip query_columns
          # in more recent versions of Postgres
          # as pg_stat_statements should be already normalized
          select_all(query, query_columns: [:query])
        else
          raise NotEnabled, "Query stats not enabled"
        end
      end
explainable?(query) click to toggle source
# File lib/pghero/methods/query_stats.rb, line 320
def explainable?(query)
  query =~ /select/i && !query.include?("?)") && !query.include?("= ?") && !query.include?("$1") && query !~ /limit \?/i
end
historical_query_stats(sort: nil, start_at: nil, end_at: nil, query_hash: nil) click to toggle source
# File lib/pghero/methods/query_stats.rb, line 254
      def historical_query_stats(sort: nil, start_at: nil, end_at: nil, query_hash: nil)
        if historical_query_stats_enabled?
          sort ||= "total_minutes"
          query = <<-SQL
            WITH query_stats AS (
              SELECT
                #{supports_query_hash? ? "query_hash" : "md5(query)"} AS query_hash,
                pghero_query_stats.user AS user,
                array_agg(LEFT(query, 10000) ORDER BY REPLACE(LEFT(query, 1000), '?', '!') COLLATE "C" ASC) AS query,
                (SUM(total_time) / 1000 / 60) AS total_minutes,
                (SUM(total_time) / SUM(calls)) AS average_time,
                SUM(calls) AS calls
              FROM
                pghero_query_stats
              WHERE
                database = #{quote(id)}
                #{supports_query_hash? ? "AND query_hash IS NOT NULL" : ""}
                #{start_at ? "AND captured_at >= #{quote(start_at)}" : ""}
                #{end_at ? "AND captured_at <= #{quote(end_at)}" : ""}
                #{query_hash ? "AND query_hash = #{quote(query_hash)}" : ""}
              GROUP BY
                1, 2
            )
            SELECT
              query_hash,
              query_stats.user,
              query[1] AS query,
              query[array_length(query, 1)] AS explainable_query,
              total_minutes,
              average_time,
              calls,
              total_minutes * 100.0 / (SELECT SUM(total_minutes) FROM query_stats) AS total_percent,
              (SELECT SUM(total_minutes) FROM query_stats) AS all_queries_total_minutes
            FROM
              query_stats
            ORDER BY
              #{quote_table_name(sort)} DESC
            LIMIT 100
          SQL

          # we can skip query_columns if all stored data is normalized
          # for now, assume it's not
          select_all_stats(query, query_columns: [:query, :explainable_query])
        else
          raise NotEnabled, "Historical query stats not enabled"
        end
      end
insert_query_stats(db_id, db_query_stats, now) click to toggle source
# File lib/pghero/methods/query_stats.rb, line 330
def insert_query_stats(db_id, db_query_stats, now)
  values =
    db_query_stats.map do |qs|
      [
        db_id,
        qs[:query],
        qs[:total_minutes] * 60 * 1000,
        qs[:calls],
        now,
        supports_query_hash? ? qs[:query_hash] : nil,
        qs[:user]
      ]
    end

  columns = %w[database query total_time calls captured_at query_hash user]
  insert_stats("pghero_query_stats", columns, values)
end
normalize_query(query) click to toggle source

removes comments combines ?, ?, ? => ?

# File lib/pghero/methods/query_stats.rb, line 326
def normalize_query(query)
  squish(query.to_s.gsub(/\?(, ?\?)+/, "?").gsub(/\/\*.+?\*\//, ""))
end