uniprot
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseUniProt Database Access
UniProt 数据库访问
Note: This skill uses the UniProt REST API directly. No Modal deployment needed - all operations run locally via HTTP requests.
注意:本技能直接调用UniProt REST API,无需部署Modal——所有操作均通过HTTP请求在本地运行。
Fetching Sequences
获取序列
By Accession
通过登录号
bash
undefinedbash
undefinedFASTA format
FASTA格式
JSON format with annotations
带注释的JSON格式
undefinedundefinedUsing Python
使用Python
python
import requests
def get_uniprot_sequence(accession):
"""Fetch sequence from UniProt."""
url = f"https://rest.uniprot.org/uniprotkb/{accession}.fasta"
response = requests.get(url)
if response.ok:
lines = response.text.strip().split('\n')
header = lines[0]
sequence = ''.join(lines[1:])
return header, sequence
return None, Nonepython
import requests
def get_uniprot_sequence(accession):
"""从UniProt获取序列。"""
url = f"https://rest.uniprot.org/uniprotkb/{accession}.fasta"
response = requests.get(url)
if response.ok:
lines = response.text.strip().split('\n')
header = lines[0]
sequence = ''.join(lines[1:])
return header, sequence
return None, NoneGetting Annotations
获取注释信息
Full Entry
完整条目
python
def get_uniprot_entry(accession):
"""Fetch full UniProt entry as JSON."""
url = f"https://rest.uniprot.org/uniprotkb/{accession}.json"
response = requests.get(url)
return response.json() if response.ok else None
entry = get_uniprot_entry("P00533")
print(f"Protein: {entry['proteinDescription']['recommendedName']['fullName']['value']}")python
def get_uniprot_entry(accession):
"""以JSON格式获取完整的UniProt条目。"""
url = f"https://rest.uniprot.org/uniprotkb/{accession}.json"
response = requests.get(url)
return response.json() if response.ok else None
entry = get_uniprot_entry("P00533")
print(f"蛋白质: {entry['proteinDescription']['recommendedName']['fullName']['value']}")Domain Boundaries
结构域边界
python
def get_domains(accession):
"""Extract domain annotations."""
entry = get_uniprot_entry(accession)
domains = []
for feature in entry.get('features', []):
if feature['type'] == 'Domain':
domains.append({
'name': feature.get('description', ''),
'start': feature['location']['start']['value'],
'end': feature['location']['end']['value']
})
return domainspython
def get_domains(accession):
"""提取结构域注释信息。"""
entry = get_uniprot_entry(accession)
domains = []
for feature in entry.get('features', []):
if feature['type'] == 'Domain':
domains.append({
'name': feature.get('description', ''),
'start': feature['location']['start']['value'],
'end': feature['location']['end']['value']
})
return domainsExample: EGFR domains
示例:EGFR结构域
domains = get_domains("P00533")
domains = get_domains("P00533")
[{'name': 'Kinase', 'start': 712, 'end': 979}, ...]
[{'name': 'Kinase', 'start': 712, 'end': 979}, ...]
undefinedundefinedSearching UniProt
搜索UniProt
By Gene Name
通过基因名称
python
def search_uniprot(query, organism=None, limit=10):
"""Search UniProt by query."""
url = "https://rest.uniprot.org/uniprotkb/search"
params = {
"query": query,
"format": "json",
"size": limit
}
if organism:
params["query"] += f" AND organism_id:{organism}"
response = requests.get(url, params=params)
return response.json()['results']python
def search_uniprot(query, organism=None, limit=10):
"""通过查询语句搜索UniProt。"""
url = "https://rest.uniprot.org/uniprotkb/search"
params = {
"query": query,
"format": "json",
"size": limit
}
if organism:
params["query"] += f" AND organism_id:{organism}"
response = requests.get(url, params=params)
return response.json()['results']Search for human EGFR
搜索人类EGFR
results = search_uniprot("EGFR", organism=9606)
undefinedresults = search_uniprot("EGFR", organism=9606)
undefinedBy Sequence Similarity (BLAST)
通过序列相似性(BLAST)
python
undefinedpython
undefinedUse UniProt BLAST
使用UniProt BLAST
undefinedundefinedCross-References
交叉引用
Get PDB Structures
获取PDB结构
python
def get_pdb_references(accession):
"""Get PDB structures for UniProt entry."""
entry = get_uniprot_entry(accession)
pdbs = []
for xref in entry.get('uniProtKBCrossReferences', []):
if xref['database'] == 'PDB':
pdbs.append({
'pdb_id': xref['id'],
'method': xref.get('properties', [{}])[0].get('value', ''),
'chains': xref.get('properties', [{}])[1].get('value', '')
})
return pdbspython
def get_pdb_references(accession):
"""获取UniProt条目的PDB结构信息。"""
entry = get_uniprot_entry(accession)
pdbs = []
for xref in entry.get('uniProtKBCrossReferences', []):
if xref['database'] == 'PDB':
pdbs.append({
'pdb_id': xref['id'],
'method': xref.get('properties', [{}])[0].get('value', ''),
'chains': xref.get('properties', [{}])[1].get('value', '')
})
return pdbsExample: PDB structures for EGFR
示例:EGFR的PDB结构
pdbs = get_pdb_references("P00533")
undefinedpdbs = get_pdb_references("P00533")
undefinedCommon Use Cases
常见使用场景
Target Selection
靶点选择
python
undefinedpython
undefined1. Find protein by name
1. 通过名称查找蛋白质
results = search_uniprot("insulin receptor", organism=9606)
results = search_uniprot("胰岛素受体", organism=9606)
2. Get accession
2. 获取登录号
accession = results[0]['primaryAccession'] # e.g., P06213
accession = results[0]['primaryAccession'] # 例如:P06213
3. Get domains
3. 获取结构域
domains = get_domains(accession)
domains = get_domains(accession)
4. Find PDB structure
4. 查找PDB结构
pdbs = get_pdb_references(accession)
pdbs = get_pdb_references(accession)
5. Download best structure for design
5. 下载最优结构用于设计
undefinedundefinedSequence Alignment Info
序列比对信息
python
def get_sequence_variants(accession):
"""Get natural variants from UniProt."""
entry = get_uniprot_entry(accession)
variants = []
for feature in entry.get('features', []):
if feature['type'] == 'Natural variant':
variants.append({
'position': feature['location']['start']['value'],
'original': feature.get('alternativeSequence', {}).get('originalSequence', ''),
'variant': feature.get('alternativeSequence', {}).get('alternativeSequences', [''])[0],
'description': feature.get('description', '')
})
return variantspython
def get_sequence_variants(accession):
"""从UniProt获取自然变异体信息。"""
entry = get_uniprot_entry(accession)
variants = []
for feature in entry.get('features', []):
if feature['type'] == 'Natural variant':
variants.append({
'position': feature['location']['start']['value'],
'original': feature.get('alternativeSequence', {}).get('originalSequence', ''),
'variant': feature.get('alternativeSequence', {}).get('alternativeSequences', [''])[0],
'description': feature.get('description', '')
})
return variantsAPI Reference
API参考
| Endpoint | Description |
|---|---|
| FASTA sequence |
| Full entry JSON |
| Search entries |
| Batch download |
| 端点 | 描述 |
|---|---|
| FASTA格式序列 |
| 完整条目JSON格式 |
| 条目搜索 |
| 批量下载 |
Troubleshooting
故障排除
Entry not found: Check accession format (e.g., P00533)
Rate limits: Add delay between requests
Large downloads: Use stream endpoint with pagination
Next: Use sequence with for embeddings or for structure.
esmcolabfold未找到条目:检查登录号格式(例如:P00533)
速率限制:在请求之间添加延迟
大文件下载:使用stream端点并分页
下一步:将序列与结合用于嵌入,或与结合用于结构分析。
esmcolabfold