Loading...
Loading...
Use when "Modal", "serverless GPU", "cloud GPU", "deploy ML model", or asking about "serverless containers", "GPU compute", "batch processing", "scheduled jobs", "autoscaling ML"
npx skill4agent add eyadsibai/ltk modal# Install
pip install modal
# Authenticate
modal token newimport modal
app = modal.App("my-app")
@app.function()
def hello():
return "Hello from Modal!"
# Run with: modal run script.py# Build image with dependencies
image = (
modal.Image.debian_slim(python_version="3.12")
.pip_install("torch", "transformers", "numpy")
)
app = modal.App("ml-app", image=image)@app.function(gpu="H100")
def train_model():
import torch
assert torch.cuda.is_available()
# GPU code here
# Available GPUs: T4, L4, A10, A100, L40S, H100, H200, B200
# Multi-GPU: gpu="H100:8"@app.function()
@modal.web_endpoint(method="POST")
def predict(data: dict):
result = model.predict(data["input"])
return {"prediction": result}
# Deploy: modal deploy script.py@app.function(schedule=modal.Cron("0 2 * * *")) # Daily at 2 AM
def daily_backup():
pass
@app.function(schedule=modal.Period(hours=4)) # Every 4 hours
def refresh_cache():
pass@app.function()
def process_item(item_id: int):
return analyze(item_id)
@app.local_entrypoint()
def main():
items = range(1000)
# Automatically parallelized across containers
results = list(process_item.map(items))volume = modal.Volume.from_name("my-data", create_if_missing=True)
@app.function(volumes={"/data": volume})
def save_results(data):
with open("/data/results.txt", "w") as f:
f.write(data)
volume.commit() # Persist changes@app.function(secrets=[modal.Secret.from_name("huggingface")])
def download_model():
import os
token = os.environ["HF_TOKEN"]@app.cls(gpu="L40S")
class Model:
@modal.enter()
def load_model(self):
from transformers import pipeline
self.pipe = pipeline("text-classification", device="cuda")
@modal.method()
def predict(self, text: str):
return self.pipe(text)
@app.local_entrypoint()
def main():
model = Model()
result = model.predict.remote("Modal is great!")@app.function(
cpu=8.0, # 8 CPU cores
memory=32768, # 32 GiB RAM
ephemeral_disk=10240, # 10 GiB disk
timeout=3600 # 1 hour timeout
)
def memory_intensive_task():
pass.map()| Platform | Best For |
|---|---|
| Modal | Serverless GPUs, autoscaling, Python-native |
| RunPod | GPU rental, long-running jobs |
| AWS Lambda | CPU workloads, AWS ecosystem |
| Replicate | Model hosting, simple deployments |