Loading...
Loading...
Run Python code in the cloud with serverless containers, GPUs, and autoscaling. Use when deploying ML models, running batch processing jobs, scheduling compute-intensive tasks, or serving APIs that require GPU acceleration or dynamic scaling.
npx skill4agent add ovachiever/droid-tings modal# Install Modal
uv uv pip install modal
# Authenticate (opens browser for login)
modal token new~/.modal.tomlimport modal
app = modal.App("test-app")
@app.function()
def hello():
print("Modal is working!")modal run script.pyimport modal
# Basic image with Python packages
image = (
modal.Image.debian_slim(python_version="3.12")
.uv_pip_install("torch", "transformers", "numpy")
)
app = modal.App("ml-app", image=image).uv_pip_install("pandas", "scikit-learn").apt_install("ffmpeg", "git")modal.Image.from_registry("nvidia/cuda:12.1.0-base").add_local_python_source("my_module")references/images.md@app.function()@app.function()
def process_data(file_path: str):
import pandas as pd
df = pd.read_csv(file_path)
return df.describe()# From local entrypoint
@app.local_entrypoint()
def main():
result = process_data.remote("data.csv")
print(result)modal run script.pyreferences/functions.md@app.function(gpu="H100")
def train_model():
import torch
assert torch.cuda.is_available()
# GPU-accelerated code hereT4L4A10A100A100-80GBL40SH100H200B200@app.function(gpu="H100:8") # 8x H100 GPUs
def train_large_model():
passreferences/gpu.md@app.function(
cpu=8.0, # 8 physical cores
memory=32768, # 32 GiB RAM
ephemeral_disk=10240 # 10 GiB disk
)
def memory_intensive_task():
passreferences/resources.md@app.function()
def analyze_sample(sample_id: int):
# Process single sample
return result
@app.local_entrypoint()
def main():
sample_ids = range(1000)
# Automatically parallelized across containers
results = list(analyze_sample.map(sample_ids))@app.function(
max_containers=100, # Upper limit
min_containers=2, # Keep warm
buffer_containers=5 # Idle buffer for bursts
)
def inference():
passreferences/scaling.mdvolume = modal.Volume.from_name("my-data", create_if_missing=True)
@app.function(volumes={"/data": volume})
def save_results(data):
with open("/data/results.txt", "w") as f:
f.write(data)
volume.commit() # Persist changesreferences/volumes.md@app.function(secrets=[modal.Secret.from_name("huggingface")])
def download_model():
import os
token = os.environ["HF_TOKEN"]
# Use token for authenticationmodal secret create my-secret KEY=value API_TOKEN=xyzreferences/secrets.md@modal.web_endpoint()@app.function()
@modal.web_endpoint(method="POST")
def predict(data: dict):
# Process request
result = model.predict(data["input"])
return {"prediction": result}modal deploy script.pyreferences/web-endpoints.md@app.function(schedule=modal.Cron("0 2 * * *")) # Daily at 2 AM
def daily_backup():
# Backup data
pass
@app.function(schedule=modal.Period(hours=4)) # Every 4 hours
def refresh_cache():
# Update cache
passreferences/scheduled-jobs.mdimport modal
# Define dependencies
image = modal.Image.debian_slim().uv_pip_install("torch", "transformers")
app = modal.App("llm-inference", image=image)
# Download model at build time
@app.function()
def download_model():
from transformers import AutoModel
AutoModel.from_pretrained("bert-base-uncased")
# Serve model
@app.cls(gpu="L40S")
class Model:
@modal.enter()
def load_model(self):
from transformers import pipeline
self.pipe = pipeline("text-classification", device="cuda")
@modal.method()
def predict(self, text: str):
return self.pipe(text)
@app.local_entrypoint()
def main():
model = Model()
result = model.predict.remote("Modal is great!")
print(result)@app.function(cpu=2.0, memory=4096)
def process_file(file_path: str):
import pandas as pd
df = pd.read_csv(file_path)
# Process data
return df.shape[0]
@app.local_entrypoint()
def main():
files = ["file1.csv", "file2.csv", ...] # 1000s of files
# Automatically parallelized across containers
for count in process_file.map(files):
print(f"Processed {count} rows")@app.function(
gpu="A100:2", # 2x A100 GPUs
timeout=3600 # 1 hour timeout
)
def train_model(config: dict):
import torch
# Multi-GPU training code
model = create_model(config)
train(model)
return metricsreferences/getting-started.mdreferences/images.mdreferences/functions.mdreferences/gpu.mdreferences/resources.mdreferences/scaling.mdreferences/volumes.mdreferences/secrets.mdreferences/web-endpoints.mdreferences/scheduled-jobs.mdreferences/examples.md.uv_pip_install()max_containersmin_containers.map().uv_pip_install("package-name")@app.function(gpu="A100")torch.cuda.is_available()@app.function(timeout=3600)volume.commit()