Loading...
Loading...
This skill should be used when the user needs to query COSMIC Cancer Gene Census to check if genes are known cancer genes. Triggers include requests to annotate genes with cancer information, check if variants are in cancer genes, or retrieve cancer gene properties from COSMIC database.
npx skill4agent add dakesan/cc-dnawork-plugin bio-cosmicuv pip install pandas typerdata/cancer_gene_census.csvcosmic-toolkit/data/cancer_gene_census.csvdata/README.md# Query single gene
python scripts/query_cosmic_genes.py --gene TP53
# Query multiple genes
python scripts/query_cosmic_genes.py --genes TP53 BRCA1 EGFR
# Query from file
python scripts/query_cosmic_genes.py --gene-list genes.txt --output results.json--gene TEXT--genes TEXT [TEXT ...]--gene-list PATH--gene-census PATHdata/cancer_gene_census.csv--output PATH{
"summary": {
"total_genes": 3,
"found_in_cancer_census": 2,
"not_found": 1
},
"genes": {
"TP53": {
"found": true,
"Gene Symbol": "TP53",
"Name": "tumor protein p53",
"Entrez GeneId": "7157",
"Genome Location": "17:7661779-7687538",
"Tier": "1",
"Hallmark": "Yes",
"Chr Band": "17p13.1",
"Somatic": "yes",
"Germline": "yes",
"Tumour Types(Somatic)": "lung NS, breast NS, colorectal NS, ...",
"Tumour Types(Germline)": "Li-Fraumeni syndrome",
"Cancer Syndrome": "Li-Fraumeni syndrome",
"Tissue Type": "E",
"Molecular Genetics": "Dom",
"Role in Cancer": "TSG",
"Mutation Types": "Mis, N, F, D"
},
"BRCA1": {
"found": true,
"Gene Symbol": "BRCA1",
"Name": "BRCA1 DNA repair associated",
"Entrez GeneId": "672",
"Genome Location": "17:43044295-43125483",
"Tier": "1",
"Hallmark": "Yes",
"Role in Cancer": "TSG",
"Somatic": "yes",
"Germline": "yes",
"Tumour Types(Somatic)": "breast, ovary",
"Cancer Syndrome": "Breast-ovarian cancer, familial, susceptibility to, 1"
},
"UNKNOWN_GENE": {
"found": false
}
}
}# Query single gene
python scripts/query_cosmic_genes.py --gene TP53
# Query multiple genes
python scripts/query_cosmic_genes.py --genes TP53 BRCA1 EGFR KRAS
# Query from gene list file
python scripts/query_cosmic_genes.py --gene-list candidate_genes.txt
# Save output to file
python scripts/query_cosmic_genes.py \
--genes TP53 BRCA1 EGFR \
--output cancer_genes.json
# Use custom Cancer Gene Census file
python scripts/query_cosmic_genes.py \
--gene TP53 \
--gene-census /path/to/cancer_gene_census.csv# Step 1: Extract gene names from VCF (using bcftools or grep)
bcftools query -f '%INFO/GENE\n' variants.vcf | sort -u > candidate_genes.txt
# Step 2: Check which genes are in Cancer Gene Census
python scripts/query_cosmic_genes.py \
--gene-list candidate_genes.txt \
--output cancer_gene_annotation.json
# Step 3: Parse results to filter cancer genes only
jq '.genes | to_entries | map(select(.value.found == true)) | from_entries' cancer_gene_annotation.json# Query genes
python scripts/query_cosmic_genes.py \
--gene-list genes.txt \
--output results.json
# Filter to Tier 1 genes only
jq '.genes | to_entries | map(select(.value.Tier == "1")) | from_entries' results.json# Query genes
python scripts/query_cosmic_genes.py \
--genes TP53 BRCA1 EGFR KRAS MYC \
--output cancer_genes.json
# Extract tumor suppressor genes (TSG)
jq '.genes | to_entries | map(select(.value."Role in Cancer" | contains("TSG"))) | from_entries' cancer_genes.json
# Extract oncogenes
jq '.genes | to_entries | map(select(.value."Role in Cancer" | contains("oncogene"))) | from_entries' cancer_genes.json# Query genes
python scripts/query_cosmic_genes.py \
--gene-list genes.txt \
--output results.json
# Filter germline cancer genes
jq '.genes | to_entries | map(select(.value.Germline == "yes")) | from_entries' results.json
# Filter somatic cancer genes
jq '.genes | to_entries | map(select(.value.Somatic == "yes")) | from_entries' results.json$ python scripts/query_cosmic_genes.py --gene TP53
Error: Cancer Gene Census file not found at: data/cancer_gene_census.csv
To use this tool, please download COSMIC data:
1. Register for free academic access:
https://cancer.sanger.ac.uk/cosmic/register
2. Download Cancer Gene Census:
https://cancer.sanger.ac.uk/cosmic/download
File: cancer_gene_census.csv (GRCh38)
3. Place the file at:
cosmic-toolkit/data/cancer_gene_census.csv
For more information, see: cosmic-toolkit/data/README.mddata/README.md$ python scripts/query_cosmic_genes.py
Error: Must specify --gene, --genes, or --gene-listpython scripts/query_cosmic_genes.py --gene TP53"found": false{
"UNKNOWN_GENE": {
"found": false
}
}# Download new version and replace existing file
mv ~/Downloads/cancer_gene_census.csv cosmic-toolkit/data/# ✅ Good: Use file for many genes
python scripts/query_cosmic_genes.py --gene-list genes.txt
# ❌ Bad: Long command line
python scripts/query_cosmic_genes.py --genes GENE1 GENE2 GENE3 ... GENE100jq# Extract only Tier 1 genes
python scripts/query_cosmic_genes.py --gene-list genes.txt | \
jq '.genes | to_entries | map(select(.value.Tier == "1"))'
# Count tumor suppressor genes
python scripts/query_cosmic_genes.py --gene-list genes.txt | \
jq '[.genes[] | select(."Role in Cancer" | contains("TSG"))] | length'# Extract genes from VCF
bcftools query -f '%INFO/GENE\n' variants.vcf | sort -u > genes.txt
# Annotate with COSMIC
python scripts/query_cosmic_genes.py --gene-list genes.txt --output cosmic_annotation.json
# Filter VCF to cancer genes only (using cancer gene list)
jq -r '.genes | to_entries | map(select(.value.found == true)) | .[].key' cosmic_annotation.json > cancer_genes.txt
bcftools view -i "GENE=@cancer_genes.txt" variants.vcf > cancer_variants.vcf# 1. Extract genes from VCF
bcftools query -f '%INFO/GENE\n' variants.vcf | sort -u > all_genes.txt
# 2. Query COSMIC
python scripts/query_cosmic_genes.py \
--gene-list all_genes.txt \
--output cosmic_results.json
# 3. Extract cancer gene names
jq -r '.genes | to_entries | map(select(.value.found == true and .value.Tier == "1")) | .[].key' \
cosmic_results.json > tier1_cancer_genes.txt
# 4. Filter VCF to Tier 1 cancer genes
grep -f tier1_cancer_genes.txt all_genes.txt | \
bcftools view -i "GENE=@-" variants.vcf > cancer_variants.vcf# Split gene list
split -l 1000 large_gene_list.txt genes_part_
# Process each part
for file in genes_part_*; do
python scripts/query_cosmic_genes.py --gene-list $file --output ${file}.json
done
# Merge results
jq -s 'reduce .[] as $item ({}; . * $item)' genes_part_*.json > merged_results.json