Loading...
Loading...
Diagnoses live CockroachDB cluster performance issues by identifying long-running queries, busy sessions, and active transactions using SQL-only interfaces. Use when users report cluster slowness, high CPU, or need to find runaway queries and their source applications without DB Console access.
npx skill4agent add cockroachlabs/cockroachdb-skills triaging-live-sql-activitycrdb_internalVIEWACTIVITYVIEWACTIVITYREDACTEDVIEWACTIVITYREDACTEDVIEWACTIVITYCANCELQUERYCANCELSESSIONSHOW GRANTS ON ROLE <username>;| Interface | Purpose | Cluster-wide? |
|---|---|---|
| Currently executing queries | Yes (with VIEWACTIVITY) |
| Active client sessions | Yes (with VIEWACTIVITY) |
| In-progress transactions | Yes (with VIEWACTIVITY) |
-- Queries running longer than 5 minutes
WITH q AS (SHOW CLUSTER STATEMENTS)
SELECT
query_id,
node_id,
session_id,
user_name,
client_address,
application_name,
start,
now() - start AS running_for,
substring(query, 1, 200) AS query_preview,
distributed,
phase
FROM q
WHERE start < now() - INTERVAL '5 minutes'
ORDER BY start
LIMIT 50;running_forquery_previewphasedistributedINTERVAL '5 minutes''10 minutes''30 seconds'LIMIT-- Sessions with active queries running > 5 minutes
WITH s AS (SHOW CLUSTER SESSIONS)
SELECT
node_id,
session_id,
user_name,
client_address,
application_name,
status,
active_query_start,
now() - active_query_start AS active_query_for,
substring(active_queries, 1, 200) AS active_queries_preview,
substring(last_active_query, 1, 200) AS last_query_preview
FROM s
WHERE active_query_start IS NOT NULL
AND active_query_start < now() - INTERVAL '5 minutes'
ORDER BY active_query_start
LIMIT 50;active_query_forapplication_nameclient_addressstatus-- Transactions running > 5 minutes
SELECT
id AS txn_id,
node_id,
session_id,
application_name,
start,
now() - start AS running_for,
num_stmts,
num_retries,
num_auto_retries,
substring(txn_string, 1, 200) AS txn_string_preview
FROM crdb_internal.cluster_transactions
WHERE start < now() - INTERVAL '5 minutes'
ORDER BY start
LIMIT 50;num_retriesnum_auto_retriesnum_stmtstxn_stringcrdb_internal.cluster_transactions-- All activity from specific application
WITH q AS (SHOW CLUSTER STATEMENTS)
SELECT query_id, user_name, start, now() - start AS running_for,
substring(query, 1, 200) AS query_preview
FROM q
WHERE application_name = 'payments-api'
ORDER BY start;-- All activity from specific user
WITH s AS (SHOW CLUSTER SESSIONS)
SELECT session_id, application_name, client_address,
active_query_start, substring(active_queries, 1, 200) AS active_queries_preview
FROM s
WHERE user_name = 'app_user'
AND active_query_start IS NOT NULL
ORDER BY active_query_start;-- All sessions from specific client IP
WITH s AS (SHOW CLUSTER SESSIONS)
SELECT session_id, user_name, application_name,
status, substring(active_queries, 1, 200) AS active_queries_preview
FROM s
WHERE client_address LIKE '10.0.1.%'
ORDER BY active_query_start;-- Long queries from specific app and user
WITH q AS (SHOW CLUSTER STATEMENTS)
SELECT query_id, node_id, start, now() - start AS running_for,
substring(query, 1, 200) AS query_preview
FROM q
WHERE application_name = 'payments-api'
AND user_name = 'app_user'
AND start < now() - INTERVAL '10 minutes'
ORDER BY start;SHOWcrdb_internal.cluster_transactionsCANCELQUERYCANCELSESSION-- 1. Identify the query_id from triage queries above
-- 2. Cancel it
CANCEL QUERY '<query_id>';CANCEL QUERY '15f9e0e91f072f0f0000000000000001';-- 1. Identify the session_id from triage queries above
-- 2. Cancel all queries in that session
CANCEL SESSION '<session_id>';CANCEL SESSION '15f9e0e91f072f0f';CANCELQUERYCANCELSESSION-- Run the "Long-Running Queries" diagnostic
-- Look for queries running > 5-10 minutes-- Group by application to find culprits
WITH q AS (SHOW CLUSTER STATEMENTS)
SELECT application_name, COUNT(*) AS num_queries,
AVG(now() - start) AS avg_duration
FROM q
WHERE start < now() - INTERVAL '5 minutes'
GROUP BY application_name
ORDER BY num_queries DESC;-- Filter by top application from step 2
-- Use "Filter by Application" querySELECT application_name, AVG(num_retries) AS avg_retries,
MAX(num_retries) AS max_retries, COUNT(*) AS num_txns
FROM crdb_internal.cluster_transactions
WHERE start < now() - INTERVAL '5 minutes'
GROUP BY application_name
HAVING AVG(num_retries) > 5
ORDER BY avg_retries DESC;-- Find transactions with >10 retries
SELECT id, application_name, num_retries, num_stmts,
substring(txn_string, 1, 200) AS txn_preview
FROM crdb_internal.cluster_transactions
WHERE num_retries > 10
ORDER BY num_retries DESC;WITH q AS (SHOW CLUSTER STATEMENTS)
SELECT user_name, COUNT(*) AS num_active_queries,
AVG(now() - start) AS avg_duration
FROM q
GROUP BY user_name
ORDER BY num_active_queries DESC;-- Use "Filter by User" query| Issue | Cause | Fix |
|---|---|---|
| No active queries, or insufficient privileges | Grant |
Query text shows | Using | This is expected for privacy; use |
| Can't cancel query: "permission denied" | Missing | Grant |
| High transaction volume on cluster | Add filters (application_name, time threshold) to reduce result set |
| "relation does not exist" error | Typo in table name or old CockroachDB version | Verify you're using production-approved tables; check CockroachDB version compatibility |
| Triage queries themselves are slow | Cluster under extreme load | Use more aggressive filters (shorter time window, specific apps); consider canceling obvious runaway work first |
VIEWACTIVITYREDACTEDVIEWACTIVITYLIMITINTERVALSHOW CLUSTER STATEMENTSSHOW CLUSTER SESSIONScrdb_internal.cluster_transactions