postgres snippets

Postgres Performance Tuning

Tagged performance, postgres, database, tuning  Languages sql

Tuning memory

To tune memory it helps to understand how much data fits into memory:

SELECT 
  sum(heap_blks_read) as heap_read,
  sum(heap_blks_hit)  as heap_hit,
  sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
FROM 
  pg_statio_user_tables;

If the cache hit rate is below 99%, try giving PG more memory.

Finding slow queries

Using the slow-query feature is one option. Telling Postgres to store statistics is easier:

CREATE extension pg_stat_statements;

-- View slowest queries
SELECT 
  (total_time / 1000 / 60) as total_minutes, 
  (total_time/calls) as average_time, 
  query 
FROM pg_stat_statements 
ORDER BY 1 DESC 
LIMIT 100;

Finding tables that are not indexed

SELECT 
  relname, 
  100 * idx_scan / (seq_scan + idx_scan) percent_of_times_index_used, 
  n_live_tup rows_in_table
FROM 
  pg_stat_user_tables
WHERE 
    seq_scan + idx_scan > 0 
ORDER BY 
  n_live_tup DESC;

Reference:

Multi-tenancy in Ecto and Phoenix

Tagged ecto, elixir, multi-tenancy, postgres, phoenix  Languages bash, elixir

Creating a new tentant

A new tenant requires a namespace, which is a schema in Postgres, and a prefix in Ecto:

$ psql -U postgres database_x
> create schema aktagon; 

Querying data

import Ecto.Query
email = "[email protected]"
q = from(m in User, where: m.email == ^email)
Repo.all(%{q | prefix: "aktagon"})

Documentation: https://hexdocs.pm/ecto/Ecto.Query.html#module-query-prefix

Inserting data

Repo.insert(
  Ecto.put_meta(
   %User{ email: "[email protected]" },
   prefix: "aktagon"
  )
)

Migrations

$ mix ecto.migrate --prefix "aktagon"

Notes

  • (KeyError) key :__meta__ not found

I got this error when passing a changeset to Ecto.put_meta instead of a User struct.

Go and Postgres Example

Tagged example, golang, polling, postgres  Languages bash, go
package main

import (
    "database/sql"
    _ "github.com/lib/pq"
    "log"
    "time"
)

func pollChanges() {
    var id int
    var name string
    rows, err := db.Query("SELECT id, name FROM people")
    if err != nil {
        log.Fatal(err)
    }
    defer rows.Close()
    for rows.Next() {
        err := rows.Scan(&id, &name)
        if err != nil {
            log.Fatal(err)
        }
        log.Println(id, name)
    }
    err = rows.Err()
    if err != nil {
        log.Fatal(err)
    }
}

var db *sql.DB

func main() {
    var err error
    db, err = sql.Open("postgres", "")
    if err != nil {
        log.Fatal(err)
    }
    defer db.Close()
    for {
        time.Sleep(1000 * time.Millisecond)
        log.Println("Polling for changes...")
        pollChanges()
    }
}

Run:

PGUSER= PGPASSWORD= PGDATABASE=db_xyz PGSSLMODE=disable ./go-app

Notes

Using question marks instead of, e.g., $1 will result in the following error:

syntax error at or near ","
  • Scan requires a pointer. Forgetting to add an ampersand (Scan(id) vs Scan(&id)) will result in the following error:
Scan error on column index 0: destination not a pointer

Storing, querying, and indexing XML with Postgres

Tagged postgres, xbrl, xml, xpath  Languages xml, sql

Create table

CREATE TABLE xbrl_reports
(
  id serial primary key NOT NULL,
  doc xml NOT NULL,
  cik varchar(255) NOT NULL
);

Create function for importing XML

-- http://tapoueh.org/blog/2009/02/05-importing-xml-content-from-file
create or replace function xml_import(filename text)
  returns xml
  volatile
  language plpgsql as
$f$
    declare
        content bytea;
        loid oid;
        lfd integer;
        lsize integer;
    begin
        loid := lo_import(filename);
        lfd := lo_open(loid,262144);
        lsize := lo_lseek(lfd,0,2);
        perform lo_lseek(lfd,0,0);
        content := loread(lfd,lsize);
        perform lo_close(lfd);
        perform lo_unlink(loid);
 
        return xmlparse(document convert_from(content,'UTF8'));
    end;
$f$;

Import XML

-- Import XML file into Postgres
insert into xbrl_reports(doc, cik) values(xml_import('/Users/Christian/Downloads/2016q3/ibm-20160930.xml'), '1');

XML namespaces:

<xbrl
  xmlns="http://www.xbrl.org/2003/instance"
  xmlns:dei="http://xbrl.sec.gov/dei/2014-01-31"

Query data

-- Check if dei::TradingSymbol exists => t
SELECT xpath('//xbrl:xbrl/dei:TradingSymbol/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}') from xbrl_reports;

-- Extract dei:TradingSymbol by declaring dei namespace => {IBM}
SELECT xpath('//xbrl:xbrl/dei:TradingSymbol/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}') from xbrl_reports;

-- Extract dei:TradingSymbol by adding ((...)[1]::text) => IBM
SELECT ((xpath('//xbrl:xbrl/dei:TradingSymbol/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}'))[1]::text) from xbrl_reports;

Index data

-- Create index for faster lookups
create index xbrl_reports_ticker_idx on xbrl_reports using btree ((( xpath('//xbrl:xbrl/dei:TradingSymbol/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}') )[1]::text)); 

Materialized views for performance

CREATE MATERIALIZED VIEW company_reports AS SELECT
  ((xpath('//xbrl:xbrl/dei:TradingSymbol/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}'))[1]::text) as ticker,
  ((xpath('//xbrl:xbrl/dei:EntityRegistrantName/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}'))[1]::text) as name,
  ((xpath('//xbrl:xbrl/dei:DocumentType/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}'))[1]::text) as document_type,
  ((xpath('//xbrl:xbrl/dei:DocumentPeriodEndDate/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}'))[1]::text) as quarter,
  ((xpath('//xbrl:xbrl/dei:EntityCommonStockSharesOutstanding/text()', doc, '{{xbrl,http://www.xbrl.org/2003/instance},{dei,http://xbrl.sec.gov/dei/2014-01-31}}'))[1]::text) as shares_outstanding
FROM xbrl_reports;

Hello Postgres and REST in Crystal

Tagged crystal, postgres, rest  Languages bash, crystal, yaml

NOTE: This does not work with Crystal 0.25.2.

Install Crystal

brew install crystal-lang
mkdir projects/hello
shards init

Set up dependencies

dependencies:
  pg:
    github: will/crystal-pg
    version: "~> 0.5"
  kemal:
    github: sdogruyol/kemal
    version: "~> 0.16.1"
shards

Write a simple REST API

require "kemal"
require "json"
require "pg"

PG_URL = "postgres://[email protected]:5432/xxx"
DB     = PG.connect PG_URL

get "/" do |env|
  env.response.content_type = "application/json"
  users = DB.exec("SELECT * FROM users")
  users.to_hash.map do |user|
    {first_name: user["first_name"].as(String), last_name: user["last_name"].as(String)}
  end.to_json
end

Kemal.run(4567)

Database diff

Tagged postgres, database, diff  Languages yaml, bash, ruby

This script will connect to two databases, named old and new, and print:

  • the names of the tables that cannot be found in the new database
  • the difference in the amount of rows per table, if there is a difference in the number of rows

db_diff.rb:

#
# Postgres database diff script. Prints the names of missing tables and the
# difference in the amount of rows per table.
#
require 'sequel'
require 'pg'
require 'yaml'
require 'pry'

list_tables_sql = "SELECT tablename from pg_tables where schemaname = 'public';"
OLD_DB = Sequel.connect(YAML.load(File.read('old_database.yml')))
NEW_DB = Sequel.connect(YAML.load(File.read('new_database.yml')))
OLD_TABLES = OLD_DB[list_tables_sql].map{ |x| x.fetch(:tablename) }
NEW_TABLES = NEW_DB[list_tables_sql].map{ |x| x.fetch(:tablename) }

# Compare tables
def diff_tables
  OLD_TABLES - NEW_TABLES
end

# Compare row count
def diff_row_count
  OLD_TABLES.sort.reduce({}) do |hash, table|
    sql = "SELECT count(*) FROM %{table}"
    # Sequel's count method does not work.
    diff = OLD_DB[sql % {table: table}].first[:count].to_i - NEW_DB[sql % {table: table}].first[:count].to_i
    hash[table] = diff if diff != 0
    hash
  end
end

puts JSON.pretty_generate(tables: diff_tables, rows: diff_row_count)

Gemfile:

source 'https://rubygems.org'

gem 'sequel'
gem 'pg'
gem 'pry'

new_database.yml and old_database.yml:

adapter: postgres
host: localhost
port: 5432
encoding: unicode
database: x_production
username: x
password: x

To run the script:

gem install bundler
bundle
bundle exec ruby db_diff.rb

Other migration and diff tools

migra is a schema diff tool for PostgreSQL. Use it to compare database schemas or autogenerate migration scripts.

Given two database connections, it output a file that represent the difference between schemas. It means that if you run the output file into the from database, it’ll have the same schema as the to database.

Compares the PostgreSQL schema between two databases and generates SQL statements that can be run manually against the second database to make their schemas match.

How to use LATERAL as a for loop in Postgres

Tagged postgres, loop, lateral  Languages sql

This example shows how to use LATERAL as a for loop in Postgres. The query will perform a lateral subquery for each month. The subquery returns the number of active courses during each month.

-- A list of months and their start and end timestamps
WITH months AS (
  SELECT start, start + (interval '1' month) - (interval '1' second) AS end FROM (
    SELECT generate_series(DATE('2010-01-01'), DATE('2010-12-01'), interval '1 month') AS start
  ) months
)
-- For each month, run a query that returns the number of active courses during that month
SELECT * FROM months AS m
LEFT JOIN LATERAL (
  SELECT count(id)
  FROM courses AS c
  WHERE
    (c.start <= m.end AND c.start >= m.start) OR
    (c.end <= m.end AND c.end IS NULL)
) AS results ON TRUE;

Postgres SQL query for extracting and querying a JSON object containing an array of JSON objects

Tagged postgres, sql, json, jsonb  Languages json, sql

Postgres 12 supports the SQL/JSON Path Language which makes querying JSON documents a lot easier than before: https://www.postgresql.org/docs/12/functions-json.html#FUNCTIONS-SQLJSON-PATH

Example:

'$.track.segments[*].location'

For older versions we have to do the extraction the hard way…

For example, if we want to extract data from a JSONB column stored in PostgreSQL. The JSON object (hash, dictionary, map, object) contains an array of JSON objects:

{
  "responses": [
    {
      "patient": {
        "ssid": "101010-XXXX",
      },
      "patient": {
        "ssid": "070710-XXXX",
      }
   ]
 }
}

In Postgresql 9.4 and higher we can write the following query to query nested arrays of objects:

SELECT
   * 
FROM
   messages 
WHERE
   body -> 'responses' @> '[{"patient":[{"ssid":"070710-XXXX"}]}]';

In earlier versions of Postgres we can use the jsonb_array_elements function:

WITH json_messages AS (
 SELECT jsonb_array_elements(body#>'{responses}')->'patient'->>'ssid', id from messages
)
SELECT * FROM json_messages WHERE ssid = '010150-XXXX';

To extract the values, we can use a lateral join and jsonb_array_elements:

SELECT DISTINCT(o.data -> 'status')
FROM json_messages m, jsonb_array_elements(m.body_json -> 'reports') d(data), jsonb_array_elements(d.data -> 'results') o(data)

Reference

Fetch the status of Postgres' streaming replication with ActiveRecord and Rails

Tagged pg_last_wal_receive_lsn, postgres, replication, streaming, pg_current_wal_flush_lsn  Languages ruby

When using streaming replication you can use this code to fetch the number of bytes the slave is behind the master in applying changes to the WAL (write-ahead log):

class SlaveDatabase < ApplicationRecord
  self.abstract_class = true
  # NOTE: Add slave entry to config/database.yml
  establish_connection :slave

  #
  # Returns the number of bytes the slave is behind the master in applying WAL updates.
  #
  def self.replication_delta
    # Returns, for example, 12/547C3A58 from the master
    current_master_wal = ActiveRecord::Base.connection.select_value("SELECT pg_current_wal_flush_lsn();")
    # Returns, for example, -7504582 from the slave
    delta = connection.select_value("SELECT #{connection.quote(current_master_wal)} - pg_last_wal_receive_lsn()")
    raise "pg_last_wal_receive_lsn returned null, see documentation for details" unless delta
    Integer(delta).abs
  end
end

Tested with version Postgres 10. Note that in version 10, pg_last_xlog_receive_location has been renamed to pg_last_wal_receive_lsn, and pg_current_xlog_flush_location has been renamed to pg_current_wal_flush_lsn.

Repeatable-read vs read-committed example in Rails / ActiveRecord

Tagged isolation, read-commited, repeatable-read, transaction, activerecord, rails, postgres  Languages ruby
ActiveRecord::Base.logger = Logger.new(STDOUT)

require "test/unit/assertions"
include Test::Unit::Assertions

class Cust1 < ActiveRecord::Base
  self.table_name = "customers"
end

class Cust2 < ActiveRecord::Base
  self.table_name = "customers"
end

# NOTE: Two separate connections
Cust1.establish_connection :development
Cust2.establish_connection :development

Cust1.find_by(username: '[email protected]').try(:delete)

c = Cust1.create!(first_name: 'Putin', last_name: 'jebus', password_digest: 'abcdef12345', username: '[email protected]')

case ENV['isolation']
when 'repeatable_read'
  #
  # NOTE: Repeatable-read isolation
  #
  # This level is different from Read Committed in that a query in a repeatable
  # read transaction sees a snapshot as of the start of the transaction, not as
  # of the start of the current query within the transaction. Thus, successive
  # SELECT commands within a single transaction see the same data, i.e., they do
  # not see changes made by other transactions that committed after their own
  # transaction started.
  #
  puts "Repeatable read"
  assert ActiveRecord::Base.connection.transaction_isolation_levels.include?(:repeatable_read)
  Cust1.transaction(isolation: :repeatable_read) do
    c.reload
    Cust2.find(c.id).update_attributes(first_name: 'Stalin')
    c.reload
    assert_equal 'Putin', c.first_name
  end
  c.reload
  assert_equal 'Stalin', c.first_name
else
  #
  # NOTE: Read-committed isolation
  #
   # Read Committed is the default isolation level in PostgreSQL. When a
  # transaction uses this isolation level, a SELECT query (without a FOR
  # UPDATE/SHARE clause) sees only data committed before the query began; it
  # never sees either uncommitted data or changes committed during query
  # execution by concurrent transactions. In effect, a SELECT query sees a
  # snapshot of the database as of the instant the query begins to run.
  # However, SELECT does see the effects of previous updates executed within
  # its own transaction, even though they are not yet committed. Also note that
  # two successive SELECT commands can see different data, even though they are
  # within a single transaction, if other transactions commit changes after the
  # first SELECT starts and before the second SELECT starts.
  #
  puts "Read committed"
  Cust1.transaction do
    c.reload
    Cust2.find(c.id).update_attributes(first_name: 'Stalin')
    c.reload
    assert_equal 'Stalin', c.first_name
  end
  c.reload
  assert_equal 'Stalin', c.first_name
end

See

For transactions in general, see: