Loading...
Loading...
Manage and query Agent Platform RAG Engine Corpora and retrieve grounded contexts using the Google GenAI SDK. Use when listing RAG corpora or files, inspecting a corpus, retrieving contexts, or generating content grounded in a RAG corpus. Do not use for standard database queries (use SQL/Spanner skills), Google Workspace RAG, or other RAG products like gRAG.
npx skill4agent add google/skills agent-platform-rag-engine-managementvertexaigcloud auth login
gcloud auth application-default loginpython3 -m venv ~/rag_agent_venv
source ~/rag_agent_venv/bin/activatepip install google-cloud-aiplatform google-genai[!TIP] Placeholder Parameter Replacement: The Python scripts below use bracketed string placeholders (like,"{project_id}", and"{region}"). You MUST dynamically replace these placeholders with the actual Project ID, Region, and Corpus ID values provided in the user's prompt (or active context) before generating, providing, or executing the scripts."{corpus_id}"
import vertexai
from vertexai.preview import rag
vertexai.init(project="{project_id}", location="{region}")
# Approach A: List ALL (Automatic Pagination)
# The SDK's Pager iterates through all pages for you.
all_corpora = list(rag.list_corpora())
print(f"Found {len(all_corpora)} corpora in total.")
for c in all_corpora:
print(f"Corpus Name: {c.name} | Display Name: {c.display_name}")
# Approach B: Manual Pagination (for very large projects)
pager = rag.list_corpora(page_size=10)
# Process first page
for c in pager:
print(f"Corpus: {c.display_name}")
# Get next page if needed
if pager.next_page_token:
second_page = rag.list_corpora(
page_size=10, page_token=pager.next_page_token
)display_nameimport vertexai
from vertexai.preview import rag
vertexai.init(project="{project_id}", location="{region}")
corpus_name = (
"projects/{project_id}/locations/{region}/ragCorpora/{corpus_id}"
)
# List files with automatic pagination
files = list(rag.list_files(corpus_name=corpus_name))
print(f"Found {len(files)} files.")
for f in files:
# High-level SDK RagFile objects usually have name, display_name,
# description
print(f"File: {f.display_name} | Resource: {f.name}")
# Tip: Check extension to understand file type (PDF, TXT, etc.)
if f.display_name.lower().endswith(".pdf"):
print(" Type: PDF")
elif f.display_name.lower().endswith(".txt"):
print(" Type: Plain Text")import vertexai
from vertexai.preview import rag
vertexai.init(project="{project_id}", location="{region}")
# To get details of a specific corpus
corpus_name = (
"projects/{project_id}/locations/{region}/ragCorpora/{corpus_id}"
)
corpus = rag.get_corpus(name=corpus_name)
print(f"Corpus Name: {corpus.name}")
print(f"Display Name: {corpus.display_name}")import vertexai
from vertexai.preview import rag
vertexai.init(project="{project_id}", location="{region}")
corpus_name = (
"projects/{project_id}/locations/{region}/ragCorpora/{corpus_id}"
)
query = "What is the speed of light?"
# Retrieve contexts
response = rag.retrieval_query(
rag_corpora=[corpus_name],
text=query,
similarity_top_k=3
)
for context in response.contexts.contexts:
print(f"Context text: {context.text}")
print(f"Source: {context.source_uri}")from google import genai
from google.genai import types
client = genai.Client(enterprise=True, project="{project_id}", location="{region}")
corpus_name = (
"projects/{project_id}/locations/{region}/ragCorpora/{corpus_id}"
)
# Define the Agent Platform RAG Engine tool pointing to the corpus
rag_tool = types.Tool(
retrieval=types.Retrieval(
vertex_rag_store=types.VertexRagStore(
rag_resources=[types.VertexRagStoreRagResource(rag_corpus=corpus_name)],
rag_retrieval_config=types.RagRetrievalConfig(
top_k=3,
filter=types.RagRetrievalConfigFilter(
vector_similarity_threshold=0.5,
),
),
)
)
)
# Generate content using the RAG Engine tool
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="What is the speed of light?",
config=types.GenerateContentConfig(
tools=[rag_tool]
)
)
print(response.text)