azure-storage-blob-py
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseAzure Blob Storage SDK for Python
Azure Blob Storage SDK for Python
Client library for Azure Blob Storage — object storage for unstructured data.
适用于Azure Blob Storage的客户端库——用于非结构化数据的对象存储。
Installation
安装
bash
pip install azure-storage-blob azure-identitybash
pip install azure-storage-blob azure-identityEnvironment Variables
环境变量
bash
AZURE_STORAGE_ACCOUNT_NAME=<your-storage-account>bash
AZURE_STORAGE_ACCOUNT_NAME=<your-storage-account>Or use full URL
Or use full URL
AZURE_STORAGE_ACCOUNT_URL=https://<account>.blob.core.windows.net
undefinedAZURE_STORAGE_ACCOUNT_URL=https://<account>.blob.core.windows.net
undefinedAuthentication
身份验证
python
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient
credential = DefaultAzureCredential()
account_url = "https://<account>.blob.core.windows.net"
blob_service_client = BlobServiceClient(account_url, credential=credential)python
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient
credential = DefaultAzureCredential()
account_url = "https://<account>.blob.core.windows.net"
blob_service_client = BlobServiceClient(account_url, credential=credential)Client Hierarchy
客户端层级结构
| Client | Purpose | Get From |
|---|---|---|
| Account-level operations | Direct instantiation |
| Container operations | |
| Single blob operations | |
| 客户端 | 用途 | 获取方式 |
|---|---|---|
| 账户级操作 | 直接实例化 |
| 容器操作 | |
| 单个Blob操作 | |
Core Workflow
核心工作流
Create Container
创建容器
python
container_client = blob_service_client.get_container_client("mycontainer")
container_client.create_container()python
container_client = blob_service_client.get_container_client("mycontainer")
container_client.create_container()Upload Blob
上传Blob
python
undefinedpython
undefinedFrom file path
From file path
blob_client = blob_service_client.get_blob_client(
container="mycontainer",
blob="sample.txt"
)
with open("./local-file.txt", "rb") as data:
blob_client.upload_blob(data, overwrite=True)
blob_client = blob_service_client.get_blob_client(
container="mycontainer",
blob="sample.txt"
)
with open("./local-file.txt", "rb") as data:
blob_client.upload_blob(data, overwrite=True)
From bytes/string
From bytes/string
blob_client.upload_blob(b"Hello, World!", overwrite=True)
blob_client.upload_blob(b"Hello, World!", overwrite=True)
From stream
From stream
import io
stream = io.BytesIO(b"Stream content")
blob_client.upload_blob(stream, overwrite=True)
undefinedimport io
stream = io.BytesIO(b"Stream content")
blob_client.upload_blob(stream, overwrite=True)
undefinedDownload Blob
下载Blob
python
blob_client = blob_service_client.get_blob_client(
container="mycontainer",
blob="sample.txt"
)python
blob_client = blob_service_client.get_blob_client(
container="mycontainer",
blob="sample.txt"
)To file
To file
with open("./downloaded.txt", "wb") as file:
download_stream = blob_client.download_blob()
file.write(download_stream.readall())
with open("./downloaded.txt", "wb") as file:
download_stream = blob_client.download_blob()
file.write(download_stream.readall())
To memory
To memory
download_stream = blob_client.download_blob()
content = download_stream.readall() # bytes
download_stream = blob_client.download_blob()
content = download_stream.readall() # bytes
Read into existing buffer
Read into existing buffer
stream = io.BytesIO()
num_bytes = blob_client.download_blob().readinto(stream)
undefinedstream = io.BytesIO()
num_bytes = blob_client.download_blob().readinto(stream)
undefinedList Blobs
列出Blob
python
container_client = blob_service_client.get_container_client("mycontainer")python
container_client = blob_service_client.get_container_client("mycontainer")List all blobs
List all blobs
for blob in container_client.list_blobs():
print(f"{blob.name} - {blob.size} bytes")
for blob in container_client.list_blobs():
print(f"{blob.name} - {blob.size} bytes")
List with prefix (folder-like)
List with prefix (folder-like)
for blob in container_client.list_blobs(name_starts_with="logs/"):
print(blob.name)
for blob in container_client.list_blobs(name_starts_with="logs/"):
print(blob.name)
Walk blob hierarchy (virtual directories)
Walk blob hierarchy (virtual directories)
for item in container_client.walk_blobs(delimiter="/"):
if item.get("prefix"):
print(f"Directory: {item['prefix']}")
else:
print(f"Blob: {item.name}")
undefinedfor item in container_client.walk_blobs(delimiter="/"):
if item.get("prefix"):
print(f"Directory: {item['prefix']}")
else:
print(f"Blob: {item.name}")
undefinedDelete Blob
删除Blob
python
blob_client.delete_blob()python
blob_client.delete_blob()Delete with snapshots
Delete with snapshots
blob_client.delete_blob(delete_snapshots="include")
undefinedblob_client.delete_blob(delete_snapshots="include")
undefinedPerformance Tuning
性能调优
python
undefinedpython
undefinedConfigure chunk sizes for large uploads/downloads
Configure chunk sizes for large uploads/downloads
blob_client = BlobClient(
account_url=account_url,
container_name="mycontainer",
blob_name="large-file.zip",
credential=credential,
max_block_size=4 * 1024 * 1024, # 4 MiB blocks
max_single_put_size=64 * 1024 * 1024 # 64 MiB single upload limit
)
blob_client = BlobClient(
account_url=account_url,
container_name="mycontainer",
blob_name="large-file.zip",
credential=credential,
max_block_size=4 * 1024 * 1024, # 4 MiB blocks
max_single_put_size=64 * 1024 * 1024 # 64 MiB single upload limit
)
Parallel upload
Parallel upload
blob_client.upload_blob(data, max_concurrency=4)
blob_client.upload_blob(data, max_concurrency=4)
Parallel download
Parallel download
download_stream = blob_client.download_blob(max_concurrency=4)
undefineddownload_stream = blob_client.download_blob(max_concurrency=4)
undefinedSAS Tokens
SAS令牌
python
from datetime import datetime, timedelta, timezone
from azure.storage.blob import generate_blob_sas, BlobSasPermissions
sas_token = generate_blob_sas(
account_name="<account>",
container_name="mycontainer",
blob_name="sample.txt",
account_key="<account-key>", # Or use user delegation key
permission=BlobSasPermissions(read=True),
expiry=datetime.now(timezone.utc) + timedelta(hours=1)
)python
from datetime import datetime, timedelta, timezone
from azure.storage.blob import generate_blob_sas, BlobSasPermissions
sas_token = generate_blob_sas(
account_name="<account>",
container_name="mycontainer",
blob_name="sample.txt",
account_key="<account-key>", # Or use user delegation key
permission=BlobSasPermissions(read=True),
expiry=datetime.now(timezone.utc) + timedelta(hours=1)
)Use SAS token
Use SAS token
blob_url = f"https://<account>.blob.core.windows.net/mycontainer/sample.txt?{sas_token}"
undefinedblob_url = f"https://<account>.blob.core.windows.net/mycontainer/sample.txt?{sas_token}"
undefinedBlob Properties and Metadata
Blob属性与元数据
python
undefinedpython
undefinedGet properties
Get properties
properties = blob_client.get_blob_properties()
print(f"Size: {properties.size}")
print(f"Content-Type: {properties.content_settings.content_type}")
print(f"Last modified: {properties.last_modified}")
properties = blob_client.get_blob_properties()
print(f"Size: {properties.size}")
print(f"Content-Type: {properties.content_settings.content_type}")
print(f"Last modified: {properties.last_modified}")
Set metadata
Set metadata
blob_client.set_blob_metadata(metadata={"category": "logs", "year": "2024"})
blob_client.set_blob_metadata(metadata={"category": "logs", "year": "2024"})
Set content type
Set content type
from azure.storage.blob import ContentSettings
blob_client.set_http_headers(
content_settings=ContentSettings(content_type="application/json")
)
undefinedfrom azure.storage.blob import ContentSettings
blob_client.set_http_headers(
content_settings=ContentSettings(content_type="application/json")
)
undefinedAsync Client
异步客户端
python
from azure.identity.aio import DefaultAzureCredential
from azure.storage.blob.aio import BlobServiceClient
async def upload_async():
credential = DefaultAzureCredential()
async with BlobServiceClient(account_url, credential=credential) as client:
blob_client = client.get_blob_client("mycontainer", "sample.txt")
with open("./file.txt", "rb") as data:
await blob_client.upload_blob(data, overwrite=True)python
from azure.identity.aio import DefaultAzureCredential
from azure.storage.blob.aio import BlobServiceClient
async def upload_async():
credential = DefaultAzureCredential()
async with BlobServiceClient(account_url, credential=credential) as client:
blob_client = client.get_blob_client("mycontainer", "sample.txt")
with open("./file.txt", "rb") as data:
await blob_client.upload_blob(data, overwrite=True)Download async
Download async
async def download_async():
async with BlobServiceClient(account_url, credential=credential) as client:
blob_client = client.get_blob_client("mycontainer", "sample.txt")
stream = await blob_client.download_blob()
data = await stream.readall()undefinedasync def download_async():
async with BlobServiceClient(account_url, credential=credential) as client:
blob_client = client.get_blob_client("mycontainer", "sample.txt")
stream = await blob_client.download_blob()
data = await stream.readall()undefinedBest Practices
最佳实践
- Use DefaultAzureCredential instead of connection strings
- Use context managers for async clients
- Set explicitly when re-uploading
overwrite=True - Use for large file transfers
max_concurrency - Prefer over
readinto()for memory efficiencyreadall() - Use for hierarchical listing
walk_blobs() - Set appropriate content types for web-served blobs
- 使用DefaultAzureCredential而非连接字符串
- 对异步客户端使用上下文管理器
- 重新上传时显式设置
overwrite=True - 针对大文件传输使用
max_concurrency - **为了内存效率,优先使用**而非
readinto()readall() - **使用**进行分层列出
walk_blobs() - 为Web服务的Blob设置合适的内容类型