Loading...
Loading...
Deploy, operate, and integrate the VSS 3.2 GA RT-Embed Video Embedding microservice. Covers Docker Compose bring-up, GPU and storage prerequisites, the `/v1` REST API (file uploads, text and video embeddings, live RTSP streams, health and metrics), Redis/Kafka/OTel integration, common failure modes, and teardown.
npx skill4agent add nvidia/skills vss-deploy-video-embeddingvss-deploy-video-embeddingRT-Embedrtvi-embedvideo embedding serviceCosmos-Embed1embed live streamembed video filegenerate video embeddingstext embedding for video searchvss-deploy-video-embeddingrtvi-embedvss-rtvi-embednvcr.io/nvstaging/vss-core/vss-rt-embedRTVI_EMBED_IMAGE3.2.0-26.05.4RTVI_EMBED_TAGbp_developer_search_2d8000${RTVI_EMBED_PORT}cosmos-embed1-448pnvidia/Cosmos-Embed1-448pGET /v1/ready1200snvidia${VAR:+value}docker login nvcr.io$oauthtokenRTVI_EMBED_PORTVSS_DATA_DIRNGC_API_KEYHF_TOKENrtvi-hf-cachertvi-ngc-model-cachertvi-triton-model-reporeferences/deploy-vss-deploy-video-embedding.mdreferences/environment.mdcd "{{repo_root}}/deploy/docker/services/rtvi/rtvi-embed"/vss-deploy-profilescripts/dev-profile.shdocker compose upexport RTVI_EMBED_PORT=8017
export VSS_DATA_DIR="${VSS_DATA_DIR:-$(pwd)/.standalone-data}"
export NGC_API_KEY="<your-ngc-api-key>"
export HOST_IP="$(hostname -I | awk '{print $1}')"
export HF_TOKEN="${HF_TOKEN:-}" # optional, but recommended to avoid HF 429s
mkdir -p "${VSS_DATA_DIR}/data_log/vst/clip_storage"
export RTVI_EMBED_KAFKA_ENABLED=false
export ENABLE_REDIS_ERROR_MESSAGES=false/data_log/vst/clip_storageVSS_DATA_DIR# Bring up the service under the required Compose profile.
docker compose -f rtvi-embed-docker-compose.yml \
--profile bp_developer_search_2d up -d rtvi-embed
# Watch logs while the model downloads and Triton repo builds.
docker compose -f rtvi-embed-docker-compose.yml logs -f rtvi-embedstart_period: 1200sBASE_URL="http://localhost:${RTVI_EMBED_PORT}"
curl -fsS "$BASE_URL/v1/ready" # 200 when warm.
curl -fsS "$BASE_URL/v1/ready?detailed=true" # Component-level status.
curl -fsS "$BASE_URL/v1/version"
MODELS_JSON=$(curl -fsS "$BASE_URL/v1/models")
echo "$MODELS_JSON" # Confirms cosmos-embed1-448p is loaded.
MODEL_ID="$(echo "$MODELS_JSON" | jq -r '.data[0].id // empty')"
test -n "$MODEL_ID" || { echo "ERROR: /v1/models has no model id — wait until /v1/ready is 200" >&2; exit 1; }$BASE_URL$MODEL_IDFILE_ID=$(curl -fsS -X POST "$BASE_URL/v1/files" \
-F purpose=vision \
-F media_type=video \
-F file=@/path/to/clip.mp4 | jq -r .id)
curl -fsS -X POST "$BASE_URL/v1/generate_video_embeddings" \
-H "Content-Type: application/json" \
-d "{
\"id\": \"$FILE_ID\",
\"model\": \"$MODEL_ID\",
\"chunk_duration\": 60,
\"chunk_overlap_duration\": 10
}"curl -fsS -X POST "$BASE_URL/v1/generate_text_embeddings" \
-H "Content-Type: application/json" \
-d "{\"text_input\":\"a forklift moving pallets\",\"model\":\"${MODEL_ID}\"}"stream: truechunk_duration > 0400 BadParameters: "Only streaming output is supported for live-streams"chunk_duration: 0streams/add400 BadParameter: "chunk_duration must be greater than 0"POST /v1/streams/addliveStreamUrlstream_idGET /v1/streams/get-stream-infoSTREAM_ID=$(curl -fsS -X POST "$BASE_URL/v1/streams/add" \
-H "Content-Type: application/json" \
-d '{"streams":[{"liveStreamUrl":"rtsp://host:port/live/video","description":"camera-001"}]}' \
| jq -r '.results[0].id')
curl -N -X POST "$BASE_URL/v1/generate_video_embeddings" \
-H "Content-Type: application/json" \
-H "Accept: text/event-stream" \
-d "{
\"id\": \"$STREAM_ID\",
\"model\": \"$MODEL_ID\",
\"stream\": true,
\"chunk_duration\": 10,
\"chunk_overlap_duration\": 2
}"
# List registered live streams (use this to recover stream_ids across sessions).
curl -fsS "$BASE_URL/v1/streams/get-stream-info"
# Stop embedding for the stream when done (terminates SSE with data: [DONE]).
curl -fsS -X DELETE "$BASE_URL/v1/generate_video_embeddings/$STREAM_ID"references/rest-api.mddocker compose -f rtvi-embed-docker-compose.yml ps
docker compose -f rtvi-embed-docker-compose.yml logs -f rtvi-embed
docker stats vss-rtvi-embed
curl -fsS "$BASE_URL/v1/metrics" # Prometheus.
curl -fsS "$BASE_URL/v1/assets/stats" # Asset storage counts and TTL.RTVI_EMBED_LOG_DIR/opt/nvidia/rtvi/log/rtvi/:${RTVI_EMBED_PORT}POST /v1/filesPOST /v1/generate_text_embeddingsPOST /v1/generate_video_embeddingsRTVI_EMBED_KAFKA_TOPICKAFKA_TOPICRTVI_EMBED_ERROR_MESSAGE_TOPICERROR_MESSAGE_TOPICRTVI_EMBED_KAFKA_ENABLED=trueKAFKA_ENABLEDENABLE_REDIS_ERROR_MESSAGES=trueRTVI_EMBED_KAFKA_ENABLED=trueKAFKA_ENABLEDRTVI_EMBED_ENABLE_OTEL_MONITORING=trueENABLE_OTEL_MONITORINGreferences/integrate-vss-deploy-video-embedding.mdreferences/troubleshooting.md/v1/readyNGC_API_KEYHF_TOKENstart_period: 1200schown -R 1001:1001RTVI_EMBED_IMAGERTVI_EMBED_TAGdocker compose -f rtvi-embed-docker-compose.yml pull rtvi-embeddocker compose -f rtvi-embed-docker-compose.yml --profile bp_developer_search_2d up -d rtvi-embed/v1/readyRTVI_EMBED_TAG# Preserve caches (named volumes survive).
docker compose -f rtvi-embed-docker-compose.yml down
# WARNING: removes rtvi-hf-cache, rtvi-ngc-model-cache, rtvi-triton-model-repo.
# Next start will re-download the model and rebuild the Triton repo (20+ min).
docker compose -f rtvi-embed-docker-compose.yml down -v| File | When to read |
|---|---|
| references/README.md | Table of contents for all reference files. |
| references/deploy-vss-deploy-video-embedding.md | Build Vision Agent deployment reference: image, GPU, storage, startup, prerequisites, known issues. |
| references/integrate-vss-deploy-video-embedding.md | Build Vision Agent integration reference: peers, inputs/outputs, env vars, network, example Compose snippet. |
| references/rest-api.md | Full REST endpoint catalog with worked |
| references/environment.md | Complete environment-variable matrix, including host-to-container renames and secret-sensitive variables. |
| references/troubleshooting.md | Operational diagnostics for startup, model/cache, runtime, and observability issues. |