Loading...
Loading...
Protein Dynamics, Evolution, and Structure analysis. Specialized in Normal Mode Analysis (NMA) using Anisotropic (ANM) and Gaussian Network Models (GNM). Features tools for structural ensemble analysis, PCA, and co-evolutionary analysis (Evol). Use for protein flexibility prediction, collective motions, structural ensemble comparison, hinge region identification, binding site analysis, MD trajectory filtering, and evolutionary analysis.
npx skill4agent add tondevrel/scientific-agent-skills prodyprody.parsePDBprody.ANMprody.GNMprody.selectprody.Ensemble'protein and resname TRP and within 5 of resname HEM'pip install prodyimport numpy as np
from prody import *
# Optional: for plotting
# confProDy(auto_show=False)from prody import *
# 1. Parse structure
atoms = parsePDB('1p38')
calphas = atoms.select('protein and calpha')
# 2. Build and solve ANM
anm = ANM('p38_anm')
anm.buildHessian(calphas)
anm.calcModes(n_modes=20)
# 3. Analyze results
for mode in anm[:3]:
print(f"Mode {mode.getIndex()}: Variance = {mode.getVariance():.2f}")
# 4. Save for visualization (NMD format for VMD/PyMOL)
writeNMD('p38_modes.nmd', anm, calphas)ensemble.iterpose()matchAlign()from prody import *
# ❌ BAD: Iterating over atoms to find distance
# for a1 in atoms:
# for a2 in atoms: ... # O(N^2) Python loop
# ✅ GOOD: Use selection algebra
nearby = atoms.select('within 5 of resname LIG')
# ❌ BAD: PCA on unaligned frames
# pca = PCA('test'); pca.buildCovariance(coord_array) # Wrong!
# ✅ GOOD: Create Ensemble and interpose
ens = Ensemble(atoms)
ens.addCoordset(trajectory)
ens.iterpose() # Crucial step
pca = PCA('test')
pca.buildCovariance(ens)
# ❌ BAD: Using NMA modes 0-5 for biology
# slow_mode = anm[0] # This is just a translation/rotationatoms = parsePDB('3hhr')
# Chain and residue range
heavy_chain = atoms.select('chain H and resnum 1 to 120')
# Chemical properties
backbone = atoms.select('backbone')
hydrophobic = atoms.select('resname ALA VAL ILE LEU MET PHE TYR TRP')
# Proximity (Binding site)
site = atoms.select('protein and within 10 of resname ATP')
# Geometric center
center = calcCenter(site)gnm = GNM('1p38_gnm')
gnm.buildKirchhoff(calphas, cutoff=10.0)
gnm.calcModes()
# Cross-correlations (How atoms move together)
cross_corr = calcCrossCorr(gnm)
# Square fluctuations (Theoretical B-factors)
sq_flucts = calcSqFlucts(gnm)anm = ANM('1p38_anm')
anm.buildHessian(calphas, cutoff=15.0)
anm.calcModes()
# Getting the hinge regions (where motion changes direction)
hinges = findHinges(anm[0]) # From the slowest mode# Parse multiple structures
pdb_ids = ['1p38', '1zz2', '1ywr']
structures = [parsePDB(pid) for pid in pdb_ids]
# Align and match
ensemble = Ensemble('p38_set')
for s in structures:
# Match calphas of s to the reference first structure
mappings = matchAlign(s, structures[0])
ensemble.addCoordset(mappings[0][0]) # Add matched coords
# PCA
pca = PCA('p38_pca')
pca.buildCovariance(ensemble)
pca.calcModes()
# Project structures onto PCs
projection = ensemble.getProjection(pca[:2])# Load Multiple Sequence Alignment (MSA)
msa = parseMSA('p38_alignment.fasta')
# Calculate conservation (Shannon Entropy)
entropy = calcShannonEntropy(msa)
# Mutual Information (Co-evolution)
mi = calcMutualInformation(msa)
# Direct Coupling Analysis (DCA) - requires external tools or specific plugins
# dca = calcDirectCovariance(msa)def get_protein_hinges(pdb_id):
atoms = parsePDB(pdb_id)
calphas = atoms.select('protein and calpha')
anm = ANM(pdb_id)
anm.buildHessian(calphas)
anm.calcModes()
# Hinge residues for the first two functional modes
hinges_m1 = findHinges(anm[0])
hinges_m2 = findHinges(anm[1])
return list(set(hinges_m1) | set(hinges_m2))def compare_md_to_anm(md_traj, p_pdb):
# 1. ANM from static structure
atoms = parsePDB(p_pdb)
anm = ANM('static')
anm.buildHessian(atoms.select('calpha'))
anm.calcModes()
# 2. PCA from MD
ens = Ensemble('md')
ens.setCoords(atoms)
ens.addCoordset(md_traj)
ens.iterpose()
pca = PCA('md_pca')
pca.buildCovariance(ens)
pca.calcModes()
# 3. Overlap (Inner product of modes)
overlap = calcOverlap(anm[0], pca[0])
return overlap# Note: Full druggability analysis usually involves 'hotspot' calculations
def binding_site_flexibility(atoms, lig_resname):
site = atoms.select(f'protein and within 8 of resname {lig_resname}')
# Calculate GNM just for the site context
gnm = GNM('site')
gnm.buildKirchhoff(atoms.select('calpha'))
gnm.calcModes()
# High fluctuations = likely flexible binding site
flucts = calcSqFlucts(gnm)
return flucts[site.getIndices()]parseMSA# ❌ Problem: ensemble.addCoordset(pdb2) fails due to different atom counts
# ✅ Solution: Use matchAlign
matches = matchAlign(pdb2, pdb1)
if matches:
ensemble.addCoordset(matches[0][0])# ✅ Solution: Use specific residue names or 'all'
ligand = atoms.select('resname MYL')# ✅ Solution: Check connectivity
if not atoms.select('protein').connected:
print("Warning: Disconnected components found!")iterpose()matchAlign()