Loading...
Loading...
Diagnose inference regressions with Doppler's shared browser/Node command contract, runtime presets, and report artifacts. (project)
npx skill4agent add clocksmith/doppler doppler-debugdoppler-benchdocs/style/general-style-guide.mddocs/style/javascript-style-guide.mddocs/style/config-style-guide.mddocs/style/command-interface-design-guide.mddocs/style/harness-style-guide.mddocs/developer-guides/README.mddocs/developer-guides/07-manifest-runtime-field.mddocs/developer-guides/12-command-surface.mddocs/developer-guides/11-wgsl-kernel.mddocs/developer-guides/13-attention-variant.mddocs/developer-guides/15-kvcache-layout.mddocs/developer-guides/composite-model-family.mddocs/developer-guides/composite-pipeline-family.mdruntime# Primary debug run (auto surface = node-first transport; browser fallback only when node transport is unavailable)
npm run debug -- --config '{"request":{"modelId":"MODEL_ID","runtimePreset":"modes/debug"},"run":{"surface":"auto"}}' --json
# Verify pass/fail with inference suite
npm run verify:model -- --config '{"request":{"suite":"inference","modelId":"MODEL_ID","runtimePreset":"modes/debug"},"run":{"surface":"auto"}}' --json
# Force browser relay for mobile/WebGPU parity checks
npm run debug -- --config '{"request":{"modelId":"MODEL_ID","runtimePreset":"diagnostics/debug-logits"},"run":{"surface":"browser","browser":{"channel":"chrome","console":true}}}' --jsonnpm run debug -- \
--config '{"request":{"modelId":"MODEL_ID"},"run":{"surface":"auto"}}' \
--runtime-config '{"shared":{"tooling":{"intent":"investigate"},"debug":{"trace":{"enabled":true,"categories":["attn","ffn"],"maxDecodeSteps":2}}},"inference":{"batching":{"maxTokens":8},"sampling":{"temperature":0}}}' \
--json# Investigate-mode profile run (trace/profiler enabled by preset)
npm run debug -- --config '{"request":{"modelId":"MODEL_ID","runtimePreset":"experiments/gemma3-profile"},"run":{"surface":"auto"}}' --json
# Fast readback sensitivity checks
npm run bench -- --config '{"request":{"modelId":"MODEL_ID","runtimePreset":"experiments/gemma3-investigate-readback-r1","cacheMode":"warm"},"run":{"surface":"browser"}}' --json
npm run bench -- --config '{"request":{"modelId":"MODEL_ID","runtimePreset":"experiments/gemma3-investigate-readback-r8","cacheMode":"warm"},"run":{"surface":"browser"}}' --json
# Direct override for decode cadence tuning
npm run bench -- \
--config '{"request":{"modelId":"MODEL_ID","cacheMode":"warm"},"run":{"surface":"browser"}}' \
--runtime-config '{"shared":{"tooling":{"intent":"investigate"}},"inference":{"batching":{"batchSize":4,"readbackInterval":4,"stopCheckMode":"per-token","maxTokens":128},"sampling":{"temperature":0}}}' \
--jsonruntime.shared.tooling.intent="calibrate"runtime.shared.tooling.intent="investigate"# Cold browser run (wipe OPFS cache before launch)
npm run debug -- --config '{"request":{"modelId":"MODEL_ID","cacheMode":"cold"},"run":{"surface":"browser"}}' --json
# Warm browser run (reuse OPFS cache)
npm run debug -- --config '{"request":{"modelId":"MODEL_ID","cacheMode":"warm"},"run":{"surface":"browser"}}' --jsonresult.metrics.modelLoadMsresult.metrics.firstTokenMsresult.metrics.prefillTokensPerSecTtftresult.metrics.prefillTokensPerSecresult.metrics.decodeTokensPerSecresult.metrics.gpuresult.memoryStatsresult.deviceInforesult.reportInfotools/doppler-cli.jssrc/tooling/command-api.jssrc/tooling/node-command-runner.jssrc/tooling/node-browser-command-runner.jssrc/inference/browser-harness.jssrc/config/presets/runtime/modes/debug.jsondocs/developer-guides/README.mddoppler-benchdoppler-convert