Loading...
Loading...
Build and extend Gemma Gem, an on-device AI browser assistant Chrome extension running Google's Gemma 4 model via WebGPU with no cloud dependencies.
npx skill4agent add aradotso/trending-skills gemma-gem-browser-aiSkill by ara.so — Daily 2026 Skills collection.
Offscreen Document Service Worker Content Script
(Gemma 4 + Agent Loop) <-> (Message Router) <-> (Chat UI + DOM Tools)
| |
WebGPU inference Screenshot capture
Token streaming JS executionoffscreen/@huggingface/transformersbackground/take_screenshotrun_javascriptcontent/agent/ModelBackendToolExecutor# Prerequisites: Node.js 18+, pnpm
pnpm install
# Development build (logging active, source maps)
pnpm build
# Production build (errors only, minified)
pnpm build:prodchrome://extensions.output/chrome-mv3-dev/onnx-community/gemma-4-E2B-it-ONNXonnx-community/gemma-4-E4B-it-ONNXagent/// agent/types.ts
export interface ModelBackend {
generate(
messages: ChatMessage[],
tools: ToolDefinition[],
options: GenerateOptions
): AsyncGenerator<StreamChunk>;
}
export interface ToolDefinition {
name: string;
description: string;
parameters: JSONSchema;
}
export interface GenerateOptions {
maxNewTokens?: number;
thinking?: boolean;
}// agent/types.ts
export interface ToolExecutor {
execute(toolName: string, args: Record<string, unknown>): Promise<unknown>;
}// agent/loop.ts — simplified illustration
export async function* runAgentLoop(
userMessage: string,
history: ChatMessage[],
model: ModelBackend,
tools: ToolExecutor,
toolDefs: ToolDefinition[],
maxIterations: number
): AsyncGenerator<AgentEvent> {
const messages = [...history, { role: "user", content: userMessage }];
for (let i = 0; i < maxIterations; i++) {
for await (const chunk of model.generate(messages, toolDefs, {})) {
if (chunk.type === "token") yield { type: "token", token: chunk.token };
if (chunk.type === "tool_call") {
yield { type: "tool_start", name: chunk.name };
const result = await tools.execute(chunk.name, chunk.args);
yield { type: "tool_result", name: chunk.name, result };
messages.push({ role: "tool", name: chunk.name, content: String(result) });
}
if (chunk.type === "done") return;
}
}
}| Tool | Location | Description |
|---|---|---|
| Content script | Read page text/HTML or a CSS selector |
| Service worker | Capture visible tab as PNG |
| Content script | Click by CSS selector |
| Content script | Type into input by CSS selector |
| Content script | Scroll by pixel amount |
| Service worker | Execute JS in page context |
// offscreen/tools/definitions.ts
export const MY_TOOL_DEFINITION: ToolDefinition = {
name: "get_page_title",
description: "Returns the document title of the current page.",
parameters: {
type: "object",
properties: {},
required: [],
},
};// offscreen/tools/index.ts
import { MY_TOOL_DEFINITION } from "./definitions";
export const ALL_TOOLS: ToolDefinition[] = [
// ...existing tools
MY_TOOL_DEFINITION,
];// content/tools/executor.ts
export async function executeContentTool(
name: string,
args: Record<string, unknown>
): Promise<unknown> {
switch (name) {
case "get_page_title":
return document.title;
case "read_page_content": {
const selector = args.selector as string | undefined;
if (selector) {
return document.querySelector(selector)?.textContent ?? "Not found";
}
return document.body.innerText;
}
case "click_element": {
const el = document.querySelector(args.selector as string) as HTMLElement;
if (!el) throw new Error(`Element not found: ${args.selector}`);
el.click();
return "clicked";
}
case "type_text": {
const input = document.querySelector(args.selector as string) as HTMLInputElement;
if (!input) throw new Error(`Input not found: ${args.selector}`);
input.focus();
input.value = args.text as string;
input.dispatchEvent(new Event("input", { bubbles: true }));
input.dispatchEvent(new Event("change", { bubbles: true }));
return "typed";
}
default:
throw new Error(`Unknown content tool: ${name}`);
}
}// background/tools.ts
export async function executeSwTool(
name: string,
args: Record<string, unknown>,
tabId: number
): Promise<unknown> {
switch (name) {
case "take_screenshot": {
const dataUrl = await chrome.tabs.captureVisibleTab({ format: "png" });
return dataUrl;
}
case "run_javascript": {
const results = await chrome.scripting.executeScript({
target: { tabId },
func: new Function(args.code as string) as () => unknown,
});
return results[0]?.result ?? null;
}
default:
return null; // not a SW tool — forward to content script
}
}chrome.runtime.sendMessage// Message types (shared/messages.ts)
export type ExtMessage =
| { type: "TOOL_CALL"; name: string; args: Record<string, unknown>; tabId: number }
| { type: "TOOL_RESULT"; name: string; result: unknown }
| { type: "TOKEN"; token: string }
| { type: "AGENT_DONE" }
| { type: "AGENT_ERROR"; error: string };
// Offscreen → SW
chrome.runtime.sendMessage<ExtMessage>({
type: "TOOL_CALL",
name: "click_element",
args: { selector: "#submit-btn" },
tabId: currentTabId,
});
// SW → Content script
chrome.tabs.sendMessage<ExtMessage>(tabId, {
type: "TOOL_CALL",
name: "click_element",
args: { selector: "#submit-btn" },
tabId,
});// offscreen/model.ts — loading with transformers.js
import { pipeline, TextGenerationPipeline } from "@huggingface/transformers";
const MODEL_IDS = {
E2B: "onnx-community/gemma-4-E2B-it-ONNX",
E4B: "onnx-community/gemma-4-E4B-it-ONNX",
} as const;
export type ModelSize = keyof typeof MODEL_IDS;
export async function loadModel(
size: ModelSize,
onProgress: (progress: number) => void
): Promise<TextGenerationPipeline> {
return pipeline("text-generation", MODEL_IDS[size], {
dtype: "q4f16",
device: "webgpu",
progress_callback: (p: { progress: number }) => onProgress(p.progress),
});
}chrome.storage.syncexport interface GemmaGemSettings {
modelSize: "E2B" | "E4B";
thinking: boolean;
maxIterations: number;
disabledHosts: string[];
}
const DEFAULT_SETTINGS: GemmaGemSettings = {
modelSize: "E2B",
thinking: false,
maxIterations: 10,
disabledHosts: [],
};
export async function getSettings(): Promise<GemmaGemSettings> {
const stored = await chrome.storage.sync.get("settings");
return { ...DEFAULT_SETTINGS, ...(stored.settings ?? {}) };
}
export async function saveSettings(patch: Partial<GemmaGemSettings>): Promise<void> {
const current = await getSettings();
await chrome.storage.sync.set({ settings: { ...current, ...patch } });
}
// Disable extension on current host
async function disableOnCurrentSite() {
const host = new URL(location.href).hostname;
const settings = await getSettings();
if (!settings.disabledHosts.includes(host)) {
await saveSettings({ disabledHosts: [...settings.disabledHosts, host] });
}
}// content/ui.ts
export function injectChatOverlay(): ShadowRoot {
const host = document.createElement("div");
host.id = "gemma-gem-host";
// Prevent page styles from leaking in
const shadow = host.attachShadow({ mode: "closed" });
// Inject styles
const style = document.createElement("style");
style.textContent = CHAT_STYLES; // imported CSS string
shadow.appendChild(style);
// Inject chat container
const container = document.createElement("div");
container.id = "gemma-gem-container";
shadow.appendChild(container);
document.body.appendChild(host);
return shadow;
}[Gemma Gem]# Service worker logs
chrome://extensions → Gemma Gem → "Inspect views: service worker"
# Offscreen document (most useful: model loading, prompts, tool calls)
chrome://extensions → Gemma Gem → "Inspect views: offscreen.html"
# Content script logs
DevTools on any page → Console (filter: [Gemma Gem])
# All extension contexts
chrome://inspect#otherif (!navigator.gpu) {
throw new Error("WebGPU not supported. Use Chrome 113+ with hardware acceleration enabled.");
}
const adapter = await navigator.gpu.requestAdapter();
if (!adapter) throw new Error("No WebGPU adapter found.");async function ensureOffscreen() {
const existing = await chrome.offscreen.hasDocument();
if (!existing) {
await chrome.offscreen.createDocument({
url: "offscreen.html",
reasons: [chrome.offscreen.Reason.WORKERS],
justification: "Run Gemma 4 inference via WebGPU",
});
}
}clear_contextconst MAX_HISTORY_TURNS = 20;
function trimHistory(messages: ChatMessage[]): ChatMessage[] {
if (messages.length <= MAX_HISTORY_TURNS * 2) return messages;
return messages.slice(-MAX_HISTORY_TURNS * 2);
}function safeParseToolCall(raw: string): { name: string; args: Record<string, unknown> } | null {
try {
return JSON.parse(raw);
} catch {
return null; // still streaming
}
}function safeQuerySelector(selector: string): Element | null {
try {
return document.querySelector(selector);
} catch {
return null; // invalid selector from model
}
}