Loading...
Loading...
Expert-level SQL database design, querying, optimization, and administration across PostgreSQL, MySQL, and SQL Server
npx skill4agent add personamanagmentlayer/pcl sql-expert-- Users table
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
is_active BOOLEAN DEFAULT true,
CONSTRAINT check_email CHECK (email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z]{2,}$')
);
-- Posts table
CREATE TABLE posts (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
title VARCHAR(200) NOT NULL,
content TEXT NOT NULL,
status VARCHAR(20) DEFAULT 'draft' CHECK (status IN ('draft', 'published', 'archived')),
published_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_user_id (user_id),
INDEX idx_status (status),
INDEX idx_published_at (published_at)
);
-- Comments table (one-to-many with posts)
CREATE TABLE comments (
id SERIAL PRIMARY KEY,
post_id INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_post_id (post_id),
INDEX idx_user_id (user_id)
);
-- Tags table (many-to-many with posts)
CREATE TABLE tags (
id SERIAL PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
slug VARCHAR(50) UNIQUE NOT NULL,
INDEX idx_slug (slug)
);
CREATE TABLE post_tags (
post_id INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY (post_id, tag_id),
INDEX idx_tag_id (tag_id)
);-- Unnormalized (bad)
CREATE TABLE orders_bad (
id INTEGER PRIMARY KEY,
customer_name VARCHAR(100),
customer_email VARCHAR(255),
customer_address TEXT,
product_names TEXT, -- "Book, Pen, Notebook"
product_prices TEXT, -- "10.00, 2.00, 5.00"
total DECIMAL(10,2)
);
-- Normalized (good) - Third Normal Form (3NF)
-- 1. Customers (separate entity)
CREATE TABLE customers (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
address TEXT
);
-- 2. Products (separate entity)
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200) NOT NULL,
price DECIMAL(10,2) NOT NULL,
description TEXT,
stock_quantity INTEGER DEFAULT 0
);
-- 3. Orders (links customer)
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
customer_id INTEGER NOT NULL REFERENCES customers(id),
order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
total DECIMAL(10,2) NOT NULL,
status VARCHAR(20) DEFAULT 'pending'
);
-- 4. Order Items (many-to-many relationship)
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER NOT NULL REFERENCES orders(id) ON DELETE CASCADE,
product_id INTEGER NOT NULL REFERENCES products(id),
quantity INTEGER NOT NULL CHECK (quantity > 0),
price DECIMAL(10,2) NOT NULL, -- Price at time of order
UNIQUE(order_id, product_id)
);-- INNER JOIN - Only matching records
SELECT
u.username,
p.title,
p.published_at
FROM users u
INNER JOIN posts p ON u.id = p.user_id
WHERE p.status = 'published'
ORDER BY p.published_at DESC;
-- LEFT JOIN - All from left table
SELECT
u.username,
COUNT(p.id) as post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username
ORDER BY post_count DESC;
-- RIGHT JOIN - All from right table
SELECT
p.title,
u.username
FROM posts p
RIGHT JOIN users u ON p.user_id = u.id;
-- FULL OUTER JOIN - All records from both tables
SELECT
u.username,
p.title
FROM users u
FULL OUTER JOIN posts p ON u.id = p.user_id;
-- CROSS JOIN - Cartesian product
SELECT
c.name as category,
s.size
FROM categories c
CROSS JOIN sizes s;
-- SELF JOIN - Join table to itself
SELECT
e1.name as employee,
e2.name as manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.id;
-- Multiple joins
SELECT
u.username,
p.title,
COUNT(c.id) as comment_count,
STRING_AGG(t.name, ', ') as tags
FROM users u
INNER JOIN posts p ON u.id = p.user_id
LEFT JOIN comments c ON p.id = c.post_id
LEFT JOIN post_tags pt ON p.id = pt.post_id
LEFT JOIN tags t ON pt.tag_id = t.id
WHERE p.status = 'published'
GROUP BY u.id, u.username, p.id, p.title
HAVING COUNT(c.id) > 5
ORDER BY comment_count DESC;-- Scalar subquery (single value)
SELECT
username,
(SELECT COUNT(*) FROM posts WHERE user_id = u.id) as post_count
FROM users u;
-- IN subquery
SELECT username
FROM users
WHERE id IN (
SELECT user_id
FROM posts
WHERE status = 'published'
GROUP BY user_id
HAVING COUNT(*) > 10
);
-- EXISTS subquery (efficient for existence checks)
SELECT u.username
FROM users u
WHERE EXISTS (
SELECT 1
FROM posts p
WHERE p.user_id = u.id
AND p.status = 'published'
);
-- NOT EXISTS
SELECT u.username
FROM users u
WHERE NOT EXISTS (
SELECT 1
FROM posts p
WHERE p.user_id = u.id
);
-- Correlated subquery
SELECT
p.title,
p.created_at,
(
SELECT COUNT(*)
FROM posts p2
WHERE p2.user_id = p.user_id
AND p2.created_at < p.created_at
) as previous_post_count
FROM posts p;
-- Subquery in FROM (derived table)
SELECT
user_stats.username,
user_stats.post_count,
user_stats.avg_comments
FROM (
SELECT
u.id,
u.username,
COUNT(DISTINCT p.id) as post_count,
AVG(comment_counts.cnt) as avg_comments
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
LEFT JOIN (
SELECT post_id, COUNT(*) as cnt
FROM comments
GROUP BY post_id
) comment_counts ON p.id = comment_counts.post_id
GROUP BY u.id, u.username
) user_stats
WHERE user_stats.post_count > 5;-- ROW_NUMBER - Assign sequential numbers
SELECT
username,
created_at,
ROW_NUMBER() OVER (ORDER BY created_at) as signup_order
FROM users;
-- RANK and DENSE_RANK
SELECT
username,
post_count,
RANK() OVER (ORDER BY post_count DESC) as rank,
DENSE_RANK() OVER (ORDER BY post_count DESC) as dense_rank
FROM (
SELECT u.username, COUNT(p.id) as post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username
) user_posts;
-- PARTITION BY - Window function per group
SELECT
p.title,
p.user_id,
p.created_at,
ROW_NUMBER() OVER (PARTITION BY p.user_id ORDER BY p.created_at DESC) as post_rank
FROM posts p;
-- Get most recent post per user
SELECT * FROM (
SELECT
p.*,
ROW_NUMBER() OVER (PARTITION BY p.user_id ORDER BY p.created_at DESC) as rn
FROM posts p
) ranked_posts
WHERE rn = 1;
-- Running total
SELECT
order_date,
total,
SUM(total) OVER (ORDER BY order_date) as running_total
FROM orders;
-- Moving average
SELECT
order_date,
total,
AVG(total) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as moving_avg_7_days
FROM orders;
-- LAG and LEAD
SELECT
order_date,
total,
LAG(total) OVER (ORDER BY order_date) as previous_total,
LEAD(total) OVER (ORDER BY order_date) as next_total,
total - LAG(total) OVER (ORDER BY order_date) as change_from_previous
FROM orders;
-- NTILE - Divide into N buckets
SELECT
username,
post_count,
NTILE(4) OVER (ORDER BY post_count DESC) as quartile
FROM (
SELECT u.username, COUNT(p.id) as post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username
) user_posts;-- Simple CTE
WITH published_posts AS (
SELECT *
FROM posts
WHERE status = 'published'
)
SELECT
u.username,
COUNT(pp.id) as published_count
FROM users u
LEFT JOIN published_posts pp ON u.id = pp.user_id
GROUP BY u.id, u.username;
-- Multiple CTEs
WITH
user_posts AS (
SELECT user_id, COUNT(*) as post_count
FROM posts
GROUP BY user_id
),
user_comments AS (
SELECT user_id, COUNT(*) as comment_count
FROM comments
GROUP BY user_id
)
SELECT
u.username,
COALESCE(up.post_count, 0) as posts,
COALESCE(uc.comment_count, 0) as comments
FROM users u
LEFT JOIN user_posts up ON u.id = up.user_id
LEFT JOIN user_comments uc ON u.id = uc.user_id;
-- Recursive CTE (hierarchical data)
WITH RECURSIVE employee_hierarchy AS (
-- Base case: top-level employees
SELECT
id,
name,
manager_id,
1 as level,
name as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: employees with managers
SELECT
e.id,
e.name,
e.manager_id,
eh.level + 1,
eh.path || ' -> ' || e.name
FROM employees e
INNER JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT * FROM employee_hierarchy
ORDER BY path;-- B-tree index (default, most common)
CREATE INDEX idx_users_email ON users(email);
-- Composite index (multiple columns)
CREATE INDEX idx_posts_user_status ON posts(user_id, status);
-- Unique index
CREATE UNIQUE INDEX idx_users_username ON users(username);
-- Partial index (filtered)
CREATE INDEX idx_active_users ON users(created_at)
WHERE is_active = true;
-- Expression index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Full-text search index (PostgreSQL)
CREATE INDEX idx_posts_content_fts ON posts
USING GIN(to_tsvector('english', content));
-- Covering index (include columns)
CREATE INDEX idx_posts_user_covering ON posts(user_id)
INCLUDE (title, created_at);
-- Hash index (PostgreSQL, equality only)
CREATE INDEX idx_users_id_hash ON users USING HASH(id);-- Use EXPLAIN to analyze queries
EXPLAIN ANALYZE
SELECT u.username, COUNT(p.id) as post_count
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username;
-- Avoid SELECT *
-- Bad
SELECT * FROM users WHERE id = 1;
-- Good
SELECT id, username, email FROM users WHERE id = 1;
-- Use LIMIT for pagination
SELECT id, title, created_at
FROM posts
ORDER BY created_at DESC
LIMIT 10 OFFSET 20;
-- Avoid functions on indexed columns in WHERE
-- Bad (index not used)
SELECT * FROM users WHERE UPPER(email) = 'ALICE@EXAMPLE.COM';
-- Good (index used)
SELECT * FROM users WHERE email = 'alice@example.com';
-- Use EXISTS instead of COUNT for existence checks
-- Bad
SELECT * FROM users WHERE (SELECT COUNT(*) FROM posts WHERE user_id = users.id) > 0;
-- Good
SELECT * FROM users WHERE EXISTS (SELECT 1 FROM posts WHERE user_id = users.id);
-- Use IN for small lists, JOIN for large lists
-- Good for small lists
SELECT * FROM posts WHERE user_id IN (1, 2, 3);
-- Better for large lists
SELECT p.*
FROM posts p
INNER JOIN user_list ul ON p.user_id = ul.id;-- Force index usage (MySQL)
SELECT * FROM posts FORCE INDEX (idx_user_id) WHERE user_id = 1;
-- Disable query cache (MySQL)
SELECT SQL_NO_CACHE * FROM users;
-- Parallel query (PostgreSQL)
SET max_parallel_workers_per_gather = 4;
SELECT COUNT(*) FROM large_table;
-- Analyze table statistics
ANALYZE users;
ANALYZE posts;
-- Vacuum (PostgreSQL)
VACUUM ANALYZE users;-- Basic transaction
BEGIN;
INSERT INTO users (username, email, password_hash)
VALUES ('alice', 'alice@example.com', 'hash123');
INSERT INTO posts (user_id, title, content)
VALUES (LAST_INSERT_ID(), 'First Post', 'Hello World');
COMMIT;
-- Rollback on error
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
-- If any error occurs:
ROLLBACK;
-- Savepoints
BEGIN;
INSERT INTO users (username, email) VALUES ('bob', 'bob@example.com');
SAVEPOINT sp1;
INSERT INTO posts (user_id, title) VALUES (1, 'Test');
-- Error occurs
ROLLBACK TO SAVEPOINT sp1; -- Rollback to savepoint, keep user insert
COMMIT;-- Read Uncommitted (lowest isolation, dirty reads possible)
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
-- Read Committed (default in PostgreSQL, MySQL)
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- Repeatable Read (default in MySQL InnoDB)
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-- Serializable (highest isolation, slowest)
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-- Example
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
BEGIN;
SELECT * FROM accounts WHERE id = 1; -- balance = 100
-- Another transaction updates balance to 200
SELECT * FROM accounts WHERE id = 1; -- still 100 (repeatable read)
COMMIT;-- Row-level locks (PostgreSQL)
SELECT * FROM users WHERE id = 1 FOR UPDATE; -- Exclusive lock
SELECT * FROM users WHERE id = 1 FOR SHARE; -- Shared lock
-- Lock timeout
SET lock_timeout = '5s';
SELECT * FROM users WHERE id = 1 FOR UPDATE;
-- Skip locked rows
SELECT * FROM queue
WHERE processed = false
FOR UPDATE SKIP LOCKED
LIMIT 10;
-- Table-level locks
LOCK TABLE users IN EXCLUSIVE MODE;CREATE OR REPLACE FUNCTION create_post_with_tags(
p_user_id INTEGER,
p_title VARCHAR,
p_content TEXT,
p_tag_names VARCHAR[]
) RETURNS INTEGER AS $$
DECLARE
v_post_id INTEGER;
v_tag_name VARCHAR;
v_tag_id INTEGER;
BEGIN
-- Create post
INSERT INTO posts (user_id, title, content, status)
VALUES (p_user_id, p_title, p_content, 'draft')
RETURNING id INTO v_post_id;
-- Add tags
FOREACH v_tag_name IN ARRAY p_tag_names
LOOP
-- Get or create tag
INSERT INTO tags (name, slug)
VALUES (v_tag_name, LOWER(REPLACE(v_tag_name, ' ', '-')))
ON CONFLICT (name) DO NOTHING
RETURNING id INTO v_tag_id;
IF v_tag_id IS NULL THEN
SELECT id INTO v_tag_id FROM tags WHERE name = v_tag_name;
END IF;
-- Link tag to post
INSERT INTO post_tags (post_id, tag_id)
VALUES (v_post_id, v_tag_id)
ON CONFLICT DO NOTHING;
END LOOP;
RETURN v_post_id;
END;
$$ LANGUAGE plpgsql;
-- Usage
SELECT create_post_with_tags(1, 'My Post', 'Content here', ARRAY['sql', 'database']);-- Update timestamp trigger
CREATE OR REPLACE FUNCTION update_modified_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_users_modtime
BEFORE UPDATE ON users
FOR EACH ROW
EXECUTE FUNCTION update_modified_column();
-- Audit log trigger
CREATE TABLE audit_log (
id SERIAL PRIMARY KEY,
table_name VARCHAR(50),
operation VARCHAR(10),
old_data JSONB,
new_data JSONB,
changed_by VARCHAR(100),
changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE OR REPLACE FUNCTION audit_trigger_func()
RETURNS TRIGGER AS $$
BEGIN
IF (TG_OP = 'DELETE') THEN
INSERT INTO audit_log (table_name, operation, old_data, changed_by)
VALUES (TG_TABLE_NAME, TG_OP, row_to_json(OLD), current_user);
RETURN OLD;
ELSIF (TG_OP = 'UPDATE') THEN
INSERT INTO audit_log (table_name, operation, old_data, new_data, changed_by)
VALUES (TG_TABLE_NAME, TG_OP, row_to_json(OLD), row_to_json(NEW), current_user);
RETURN NEW;
ELSIF (TG_OP = 'INSERT') THEN
INSERT INTO audit_log (table_name, operation, new_data, changed_by)
VALUES (TG_TABLE_NAME, TG_OP, row_to_json(NEW), current_user);
RETURN NEW;
END IF;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER users_audit
AFTER INSERT OR UPDATE OR DELETE ON users
FOR EACH ROW EXECUTE FUNCTION audit_trigger_func();-- Simple view
CREATE VIEW active_users AS
SELECT id, username, email, created_at
FROM users
WHERE is_active = true;
-- Materialized view (PostgreSQL) - cached results
CREATE MATERIALIZED VIEW user_post_stats AS
SELECT
u.id,
u.username,
COUNT(p.id) as post_count,
MAX(p.created_at) as last_post_at
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
GROUP BY u.id, u.username;
-- Refresh materialized view
REFRESH MATERIALIZED VIEW user_post_stats;
-- Updatable view
CREATE VIEW recent_posts AS
SELECT id, user_id, title, content
FROM posts
WHERE created_at > CURRENT_DATE - INTERVAL '7 days';
-- Can INSERT/UPDATE/DELETE through this view-- JSON column
CREATE TABLE events (
id SERIAL PRIMARY KEY,
event_type VARCHAR(50),
data JSONB, -- JSONB is faster than JSON
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insert JSON
INSERT INTO events (event_type, data)
VALUES ('user_signup', '{"email": "alice@example.com", "referrer": "google"}');
-- Query JSON
SELECT * FROM events
WHERE data->>'event_type' = 'user_signup';
SELECT * FROM events
WHERE data->'user'->>'age' > '18';
-- JSON array contains
SELECT * FROM events
WHERE data->'tags' @> '["sql"]';
-- JSON path query
SELECT * FROM events
WHERE data @@ '$.user.age > 18';
-- Update JSON
UPDATE events
SET data = jsonb_set(data, '{user,verified}', 'true')
WHERE id = 1;
-- JSON aggregation
SELECT
event_type,
json_agg(data) as events
FROM events
GROUP BY event_type;-- Prevent SQL injection
-- Bad (vulnerable)
query = "SELECT * FROM users WHERE email = '" + userInput + "'";
-- Good (safe)
PREPARE stmt FROM 'SELECT * FROM users WHERE email = ?';
EXECUTE stmt USING @email;1NF: Atomic values, no repeating groups
2NF: 1NF + no partial dependencies
3NF: 2NF + no transitive dependencies
Denormalize only for performance when needed-- Enforce referential integrity
CREATE TABLE posts (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
title VARCHAR(200) NOT NULL
);-- Index foreign keys
CREATE INDEX idx_posts_user_id ON posts(user_id);
-- Index columns used in WHERE, JOIN, ORDER BY
CREATE INDEX idx_posts_created_at ON posts(created_at);
-- Don't over-index (slows writes)CREATE TABLE users (
id SERIAL PRIMARY KEY,
age INTEGER CHECK (age >= 0 AND age <= 150),
email VARCHAR(255) NOT NULL UNIQUE,
status VARCHAR(20) DEFAULT 'active'
CHECK (status IN ('active', 'inactive', 'banned'))
);-- Bad - multiple inserts
INSERT INTO users (name) VALUES ('Alice');
INSERT INTO users (name) VALUES ('Bob');
INSERT INTO users (name) VALUES ('Charlie');
-- Good - single insert
INSERT INTO users (name) VALUES
('Alice'),
('Bob'),
('Charlie');