Loading...
Loading...
Expert-level PostgreSQL database administration, advanced queries, performance tuning, and production operations
npx skill4agent add personamanagmentlayer/pcl postgresql-expert-- Create table with JSONB
CREATE TABLE events (
id SERIAL PRIMARY KEY,
event_type VARCHAR(50),
data JSONB NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insert JSON data
INSERT INTO events (event_type, data) VALUES
('user_signup', '{"email": "alice@example.com", "referrer": "google"}'),
('purchase', '{"product_id": 123, "amount": 99.99, "currency": "USD"}');
-- Query JSON
SELECT * FROM events WHERE data->>'email' = 'alice@example.com';
SELECT * FROM events WHERE data->'amount' > '50';
SELECT * FROM events WHERE data @> '{"currency": "USD"}';
-- Extract JSON values
SELECT
event_type,
data->>'email' as email,
(data->>'amount')::NUMERIC as amount
FROM events;
-- JSON operators
-- -> get JSON object field
-- ->> get JSON object field as text
-- #> get JSON object at path
-- #>> get JSON object at path as text
-- @> contains
-- <@ is contained by
-- ? has key
-- ?| has any keys
-- ?& has all keys
-- Update JSON
UPDATE events
SET data = jsonb_set(data, '{verified}', 'true')
WHERE event_type = 'user_signup';
-- Remove JSON key
UPDATE events
SET data = data - 'temp_field'
WHERE id = 1;
-- JSON aggregation
SELECT
event_type,
jsonb_agg(data) as all_events
FROM events
GROUP BY event_type;-- Array columns
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
tags TEXT[],
scores INTEGER[]
);
-- Insert arrays
INSERT INTO users (name, tags, scores) VALUES
('Alice', ARRAY['admin', 'developer'], ARRAY[95, 87, 92]),
('Bob', ARRAY['user', 'viewer'], ARRAY[78, 85]);
-- Query arrays
SELECT * FROM users WHERE 'admin' = ANY(tags);
SELECT * FROM users WHERE tags @> ARRAY['developer'];
SELECT * FROM users WHERE tags && ARRAY['admin', 'moderator']; -- Overlaps
-- Array functions
SELECT
name,
array_length(tags, 1) as tag_count,
array_agg(unnest(scores)) as all_scores
FROM users
GROUP BY name;
-- Unnest array
SELECT
name,
unnest(tags) as tag
FROM users;-- Enable UUID extension
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
email VARCHAR(255) UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insert with UUID
INSERT INTO users (email) VALUES ('alice@example.com');
-- Query by UUID
SELECT * FROM users WHERE id = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';-- Integer range
CREATE TABLE reservations (
id SERIAL PRIMARY KEY,
room_id INTEGER,
dates DATERANGE NOT NULL,
EXCLUDE USING GIST (room_id WITH =, dates WITH &&)
);
-- Insert ranges
INSERT INTO reservations (room_id, dates) VALUES
(101, '[2024-01-01,2024-01-05)');
-- Query ranges
SELECT * FROM reservations
WHERE dates @> '2024-01-03'::DATE;
SELECT * FROM reservations
WHERE dates && '[2024-01-02,2024-01-06)'::DATERANGE;-- Create table with full-text search
CREATE TABLE articles (
id SERIAL PRIMARY KEY,
title TEXT,
content TEXT,
search_vector tsvector
);
-- Generate tsvector
UPDATE articles
SET search_vector =
setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(content, '')), 'B');
-- Trigger to automatically update search_vector
CREATE FUNCTION articles_search_trigger() RETURNS TRIGGER AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.content, '')), 'B');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER articles_search_update
BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION articles_search_trigger();
-- Create GIN index for search
CREATE INDEX articles_search_idx ON articles USING GIN(search_vector);
-- Search queries
SELECT * FROM articles
WHERE search_vector @@ to_tsquery('english', 'postgresql & performance');
SELECT * FROM articles
WHERE search_vector @@ to_tsquery('english', 'database | sql');
-- Ranked search results
SELECT
id,
title,
ts_rank(search_vector, query) AS rank
FROM articles, to_tsquery('english', 'postgresql & optimization') query
WHERE search_vector @@ query
ORDER BY rank DESC;
-- Highlighted search results
SELECT
id,
title,
ts_headline('english', content, query) as highlighted
FROM articles, to_tsquery('english', 'postgresql') query
WHERE search_vector @@ query;-- B-tree (default, for =, <, <=, >, >=)
CREATE INDEX idx_users_email ON users(email);
-- Hash (for = only, faster but fewer features)
CREATE INDEX idx_users_email_hash ON users USING HASH(email);
-- GIN (for full-text search, JSONB, arrays)
CREATE INDEX idx_events_data ON events USING GIN(data);
CREATE INDEX idx_users_tags ON users USING GIN(tags);
-- GiST (for geometric data, full-text search)
CREATE INDEX idx_locations ON locations USING GIST(coordinates);
-- BRIN (for large tables with natural ordering)
CREATE INDEX idx_logs_created ON logs USING BRIN(created_at);
-- Partial indexes (filtered)
CREATE INDEX idx_active_users ON users(email)
WHERE is_active = true AND deleted_at IS NULL;
-- Expression indexes
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Multi-column indexes
CREATE INDEX idx_orders_user_date ON orders(user_id, created_at DESC);
-- Covering indexes (INCLUDE clause)
CREATE INDEX idx_users_email_covering ON users(email)
INCLUDE (name, created_at);
-- Unique indexes
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);
-- Concurrent index creation (no table lock)
CREATE INDEX CONCURRENTLY idx_users_name ON users(name);-- List indexes
SELECT
schemaname,
tablename,
indexname,
indexdef
FROM pg_indexes
WHERE tablename = 'users';
-- Index size
SELECT
indexname,
pg_size_pretty(pg_relation_size(indexname::regclass)) as size
FROM pg_indexes
WHERE tablename = 'users';
-- Unused indexes
SELECT
schemaname || '.' || tablename AS table,
indexname AS index,
pg_size_pretty(pg_relation_size(i.indexrelid)) AS index_size,
idx_scan as index_scans
FROM pg_stat_user_indexes ui
JOIN pg_index i ON ui.indexrelid = i.indexrelid
WHERE NOT indisunique
AND idx_scan < 50
AND pg_relation_size(i.indexrelid) > 5 * 8192
ORDER BY pg_relation_size(i.indexrelid) DESC;
-- Rebuild index
REINDEX INDEX idx_users_email;
REINDEX TABLE users;
-- Drop index
DROP INDEX idx_users_email;
DROP INDEX CONCURRENTLY idx_users_email; -- Without table lock-- Running total
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) as running_total
FROM orders;
-- Moving average
SELECT
date,
value,
AVG(value) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as moving_avg_7_days
FROM metrics;
-- Row number within partition
SELECT
user_id,
order_date,
amount,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY order_date DESC) as rn
FROM orders;
-- Get most recent order per user
SELECT * FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) as rn
FROM orders
) ranked
WHERE rn = 1;
-- Rank and dense_rank
SELECT
name,
score,
RANK() OVER (ORDER BY score DESC) as rank,
DENSE_RANK() OVER (ORDER BY score DESC) as dense_rank,
PERCENT_RANK() OVER (ORDER BY score) as percentile
FROM students;
-- LAG and LEAD
SELECT
date,
value,
LAG(value) OVER (ORDER BY date) as previous_value,
LEAD(value) OVER (ORDER BY date) as next_value,
value - LAG(value) OVER (ORDER BY date) as change
FROM metrics;
-- NTILE (divide into buckets)
SELECT
name,
salary,
NTILE(4) OVER (ORDER BY salary DESC) as quartile
FROM employees;-- Employee hierarchy
WITH RECURSIVE employee_tree AS (
-- Base case: top-level employees
SELECT
id,
name,
manager_id,
1 as level,
name::TEXT as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case
SELECT
e.id,
e.name,
e.manager_id,
et.level + 1,
et.path || ' -> ' || e.name
FROM employees e
INNER JOIN employee_tree et ON e.manager_id = et.id
)
SELECT * FROM employee_tree
ORDER BY path;
-- Calculate factorial
WITH RECURSIVE factorial(n, fact) AS (
SELECT 1, 1
UNION ALL
SELECT n + 1, fact * (n + 1)
FROM factorial
WHERE n < 10
)
SELECT * FROM factorial;
-- Generate series alternative
WITH RECURSIVE numbers(n) AS (
SELECT 1
UNION ALL
SELECT n + 1 FROM numbers WHERE n < 100
)
SELECT * FROM numbers;-- Get top 3 orders per user
SELECT
u.name,
o.order_date,
o.total
FROM users u
CROSS JOIN LATERAL (
SELECT order_date, total
FROM orders
WHERE user_id = u.id
ORDER BY order_date DESC
LIMIT 3
) o;
-- Complex aggregations
SELECT
u.name,
stats.order_count,
stats.total_spent,
stats.avg_order
FROM users u
LEFT JOIN LATERAL (
SELECT
COUNT(*) as order_count,
SUM(total) as total_spent,
AVG(total) as avg_order
FROM orders
WHERE user_id = u.id
) stats ON true;-- See query plan
EXPLAIN SELECT * FROM users WHERE email = 'alice@example.com';
-- See actual execution
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'alice@example.com';
-- More details
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT u.name, COUNT(o.id)
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
-- Look for:
-- - Seq Scan (bad for large tables)
-- - Index Scan (good)
-- - High cost values
-- - Slow execution time
-- - Large buffer reads-- Use indexes
CREATE INDEX idx_users_email ON users(email);
-- Avoid SELECT *
-- Bad
SELECT * FROM users;
-- Good
SELECT id, name, email FROM users;
-- Use LIMIT
SELECT id, name FROM users ORDER BY created_at DESC LIMIT 10;
-- Avoid functions on indexed columns in WHERE
-- Bad (index not used)
SELECT * FROM users WHERE UPPER(email) = 'ALICE@EXAMPLE.COM';
-- Good (index used)
SELECT * FROM users WHERE email = 'alice@example.com';
-- Or use expression index
CREATE INDEX idx_users_email_upper ON users(UPPER(email));
-- Use EXISTS instead of COUNT
-- Bad
SELECT * FROM users WHERE (SELECT COUNT(*) FROM orders WHERE user_id = users.id) > 0;
-- Good
SELECT * FROM users WHERE EXISTS (SELECT 1 FROM orders WHERE user_id = users.id);
-- Partition large tables
CREATE TABLE orders_2024_01 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
-- Use appropriate JOIN type
-- INNER JOIN when both sides must match
-- LEFT JOIN when left side is needed regardless
-- Avoid RIGHT JOIN (use LEFT JOIN instead)-- Use connection pooler like PgBouncer
-- Configure in application:
DATABASE_URL=postgresql://user:pass@pgbouncer:6432/mydb?pool_timeout=10&pool_size=20-- Read Committed (default)
BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- Repeatable Read
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-- Serializable
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-- Example
BEGIN;
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
COMMIT;-- Row-level locks
SELECT * FROM users WHERE id = 1 FOR UPDATE; -- Exclusive lock
SELECT * FROM users WHERE id = 1 FOR SHARE; -- Shared lock
-- Skip locked rows (useful for queues)
SELECT * FROM jobs
WHERE status = 'pending'
ORDER BY created_at
FOR UPDATE SKIP LOCKED
LIMIT 10;
-- Table-level locks
LOCK TABLE users IN EXCLUSIVE MODE;
-- Advisory locks (application-level)
SELECT pg_advisory_lock(123);
-- Do work
SELECT pg_advisory_unlock(123);
-- Check locks
SELECT
pid,
usename,
pg_blocking_pids(pid) as blocked_by,
query
FROM pg_stat_activity
WHERE cardinality(pg_blocking_pids(pid)) > 0;# Full database backup
pg_dump -U postgres -d mydb -F c -f mydb_backup.dump
# Restore
pg_restore -U postgres -d mydb_restored -F c mydb_backup.dump
# Backup single table
pg_dump -U postgres -d mydb -t users -F c -f users_backup.dump
# Plain SQL backup
pg_dump -U postgres -d mydb -f mydb_backup.sql
# Backup all databases
pg_dumpall -U postgres -f all_databases.sql
# Continuous archiving (point-in-time recovery)
# In postgresql.conf:
wal_level = replica
archive_mode = on
archive_command = 'cp %p /path/to/archive/%f'-- Manual vacuum
VACUUM users;
VACUUM FULL users; -- Reclaim space (locks table)
VACUUM ANALYZE users; -- Vacuum and update statistics
-- Analyze (update statistics)
ANALYZE users;
-- Autovacuum settings (postgresql.conf)
autovacuum = on
autovacuum_max_workers = 3
autovacuum_naptime = 1min
-- Check bloat
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) - pg_relation_size(schemaname||'.'||tablename)) as bloat
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;-- Current connections
SELECT
datname,
count(*) as connections
FROM pg_stat_activity
GROUP BY datname;
-- Long-running queries
SELECT
pid,
now() - query_start as duration,
query,
state
FROM pg_stat_activity
WHERE state = 'active'
AND now() - query_start > interval '5 minutes'
ORDER BY duration DESC;
-- Kill query
SELECT pg_cancel_backend(12345); -- Send SIGINT
SELECT pg_terminate_backend(12345); -- Send SIGTERM
-- Database size
SELECT
datname,
pg_size_pretty(pg_database_size(datname)) as size
FROM pg_database
ORDER BY pg_database_size(datname) DESC;
-- Table sizes
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as total_size,
pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) as table_size,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) - pg_relation_size(schemaname||'.'||tablename)) as indexes_size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
LIMIT 20;
-- Cache hit ratio
SELECT
sum(heap_blks_read) as heap_read,
sum(heap_blks_hit) as heap_hit,
sum(heap_blks_hit) / nullif(sum(heap_blks_hit) + sum(heap_blks_read), 0) as ratio
FROM pg_statio_user_tables;-- Primary server (postgresql.conf)
wal_level = replica
max_wal_senders = 10
wal_keep_size = 1GB
-- Create replication user
CREATE ROLE replicator WITH REPLICATION LOGIN PASSWORD 'password';
-- Replica server (recovery.conf or postgresql.auto.conf)
primary_conninfo = 'host=primary.example.com port=5432 user=replicator password=password'
hot_standby = on
-- Check replication status (on primary)
SELECT
client_addr,
state,
sent_lsn,
write_lsn,
flush_lsn,
replay_lsn,
sync_state
FROM pg_stat_replication;
-- Replication lag
SELECT
now() - pg_last_xact_replay_timestamp() AS replication_lag;-- Use specific types
-- Bad: VARCHAR(255) for everything
-- Good: Use appropriate types
email VARCHAR(255)
age INTEGER
price NUMERIC(10,2)
is_active BOOLEAN
created_at TIMESTAMP WITH TIME ZONECREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
age INTEGER CHECK (age >= 0 AND age <= 150),
status VARCHAR(20) DEFAULT 'active' CHECK (status IN ('active', 'inactive', 'banned'))
);BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
COMMIT;-- Index foreign keys
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- Index columns used in WHERE, JOIN, ORDER BY
CREATE INDEX idx_users_created_at ON users(created_at);
-- Don't over-index (slows writes)-- Schedule regular VACUUM ANALYZE
-- Monitor slow queries
-- Check for bloat
-- Update statistics