Loading...
Loading...
Comprehensive Modal.com platform knowledge covering all features, pricing, and best practices
npx skill4agent add josiahsiegel/claude-plugin-marketplace modal-knowledgeimport modal
app = modal.App("app-name")
@app.function()
def basic_function(arg: str) -> str:
return f"Result: {arg}"
@app.local_entrypoint()
def main():
result = basic_function.remote("test")
print(result)| Parameter | Type | Description |
|---|---|---|
| Image | Container image configuration |
| str/list | GPU type(s): "T4", "A100", ["H100", "A100"] |
| float | CPU cores (0.125 to 64) |
| int | Memory in MB (128 to 262144) |
| int | Max execution seconds |
| int | Retry attempts on failure |
| list | Secrets to inject |
| dict | Volume mount points |
| Cron/Period | Scheduled execution |
| int | Max concurrent executions |
| int | Seconds to keep warm |
| bool | Auto-sync source code |
| GPU | Memory | Use Case | ~Cost/hr |
|---|---|---|---|
| T4 | 16 GB | Small inference | $0.59 |
| L4 | 24 GB | Medium inference | $0.80 |
| A10G | 24 GB | Inference/fine-tuning | $1.10 |
| L40S | 48 GB | Heavy inference | $1.50 |
| A100-40GB | 40 GB | Training | $2.00 |
| A100-80GB | 80 GB | Large models | $3.00 |
| H100 | 80 GB | Cutting-edge | $5.00 |
| H200 | 141 GB | Largest models | $5.00 |
| B200 | 180+ GB | Latest gen | $6.25 |
# Single GPU
@app.function(gpu="A100")
# Specific memory variant
@app.function(gpu="A100-80GB")
# Multi-GPU
@app.function(gpu="H100:4")
# Fallbacks (tries in order)
@app.function(gpu=["H100", "A100", "any"])
# "any" = L4, A10G, or T4
@app.function(gpu="any")# Debian slim (recommended)
modal.Image.debian_slim(python_version="3.11")
# From Dockerfile
modal.Image.from_dockerfile("./Dockerfile")
# From Docker registry
modal.Image.from_registry("nvidia/cuda:12.1.0-base-ubuntu22.04")# pip (standard)
image.pip_install("torch", "transformers")
# uv (FASTER - 10-100x)
image.uv_pip_install("torch", "transformers")
# System packages
image.apt_install("ffmpeg", "libsm6")
# Shell commands
image.run_commands("apt-get update", "make install")# Single file
image.add_local_file("./config.json", "/app/config.json")
# Directory
image.add_local_dir("./models", "/app/models")
# Python source
image.add_local_python_source("my_module")
# Environment variables
image.env({"VAR": "value"})def download_model():
from huggingface_hub import snapshot_download
snapshot_download("model-name")
image.run_function(download_model, secrets=[...])# Create/reference volume
vol = modal.Volume.from_name("my-vol", create_if_missing=True)
# Mount in function
@app.function(volumes={"/data": vol})
def func():
# Read/write to /data
vol.commit() # Persist changes# From dashboard (recommended)
modal.Secret.from_name("secret-name")
# From dictionary
modal.Secret.from_dict({"KEY": "value"})
# From local env
modal.Secret.from_local_environ(["KEY1", "KEY2"])
# From .env file
modal.Secret.from_dotenv()
# Usage
@app.function(secrets=[modal.Secret.from_name("api-keys")])
def func():
import os
key = os.environ["API_KEY"]# Distributed dict
d = modal.Dict.from_name("cache", create_if_missing=True)
d["key"] = "value"
d.put("key", "value", ttl=3600)
# Distributed queue
q = modal.Queue.from_name("jobs", create_if_missing=True)
q.put("task")
item = q.get()@app.function()
@modal.fastapi_endpoint()
def hello(name: str = "World"):
return {"message": f"Hello, {name}!"}from fastapi import FastAPI
web_app = FastAPI()
@web_app.post("/predict")
def predict(text: str):
return {"result": process(text)}
@app.function()
@modal.asgi_app()
def fastapi_app():
return web_appfrom flask import Flask
flask_app = Flask(__name__)
@app.function()
@modal.wsgi_app()
def flask_endpoint():
return flask_app@app.function()
@modal.web_server(port=8000)
def custom_server():
subprocess.run(["python", "-m", "http.server", "8000"])@modal.asgi_app(custom_domains=["api.example.com"])# Daily at 8 AM UTC
@app.function(schedule=modal.Cron("0 8 * * *"))
# With timezone
@app.function(schedule=modal.Cron("0 6 * * *", timezone="America/New_York"))@app.function(schedule=modal.Period(hours=5))
@app.function(schedule=modal.Period(days=1))modal deploymodal run# Parallel execution (up to 1000 concurrent)
results = list(func.map(items))
# Unordered (faster)
results = list(func.map(items, order_outputs=False))# Spread args
pairs = [(1, 2), (3, 4)]
results = list(add.starmap(pairs))# Async job (returns immediately)
call = func.spawn(data)
result = call.get() # Get result later
# Spawn many
calls = [func.spawn(item) for item in items]
results = [call.get() for call in calls]@app.cls(gpu="A100", container_idle_timeout=300)
class Server:
@modal.enter()
def load(self):
self.model = load_model()
@modal.method()
def predict(self, text):
return self.model(text)
@modal.exit()
def cleanup(self):
del self.model@modal.concurrent(max_inputs=100, target_inputs=80)
@modal.method()
def batched(self, item):
passmodal run app.py # Run function
modal serve app.py # Hot-reload dev server
modal shell app.py # Interactive shell
modal shell app.py --gpu A100 # Shell with GPUmodal deploy app.py # Deploy
modal app list # List apps
modal app logs app-name # View logs
modal app stop app-name # Stop app# Volumes
modal volume create name
modal volume list
modal volume put name local remote
modal volume get name remote local
# Secrets
modal secret create name KEY=value
modal secret list
# Environments
modal environment create staging| Plan | Price | Containers | GPU Concurrency |
|---|---|---|---|
| Starter | Free ($30 credits) | 100 | 10 |
| Team | $250/month | 1000 | 50 |
| Enterprise | Custom | Unlimited | Custom |
@modal.enter()uv_pip_installorder_outputs=Falsecontainer_idle_timeoutmodal runmodal deploy@app.cls(gpu="A100", container_idle_timeout=300)
class LLM:
@modal.enter()
def load(self):
from vllm import LLM
self.llm = LLM(model="...")
@modal.method()
def generate(self, prompt):
return self.llm.generate([prompt])@app.function(volumes={"/data": vol})
def process(file):
# Process file
vol.commit()
# Parallel
results = list(process.map(files))@app.function(
schedule=modal.Cron("0 6 * * *"),
secrets=[modal.Secret.from_name("db")]
)
def daily_etl():
extract()
transform()
load()| Task | Code |
|---|---|
| Create app | |
| Basic function | |
| With GPU | |
| With image | |
| Web endpoint | |
| Scheduled | |
| Mount volume | |
| Use secret | |
| Parallel map | |
| Async spawn | |
| Class pattern | |