Loading...
Loading...
Generate text embeddings and rerank documents via Together AI. Embedding models include BGE, GTE, E5, UAE families. Reranking via MixedBread reranker. Use when users need text embeddings, vector search, semantic similarity, document reranking, RAG pipeline components, or retrieval-augmented generation.
npx skill4agent add zainhas/togetherai-skills together-embeddings/v1/embeddings/v1/rerankfrom together import Together
client = Together()
response = client.embeddings.create(
model="BAAI/bge-base-en-v1.5",
input="What is the meaning of life?",
)
print(response.data[0].embedding[:5]) # First 5 dimensionsimport Together from "together-ai";
const together = new Together();
const response = await together.embeddings.create({
model: "BAAI/bge-base-en-v1.5",
input: "What is the meaning of life?",
});
console.log(response.data[0].embedding.slice(0, 5));curl -X POST "https://api.together.xyz/v1/embeddings" \
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{"model":"BAAI/bge-base-en-v1.5","input":"What is the meaning of life?"}'texts = ["First document", "Second document", "Third document"]
response = client.embeddings.create(
model="BAAI/bge-base-en-v1.5",
input=texts,
)
for i, item in enumerate(response.data):
print(f"Text {i}: {len(item.embedding)} dimensions")import Together from "together-ai";
const together = new Together();
const response = await together.embeddings.create({
model: "BAAI/bge-base-en-v1.5",
input: [
"First document",
"Second document",
"Third document",
],
});
for (const item of response.data) {
console.log(`Index ${item.index}: ${item.embedding.length} dimensions`);
}curl -X POST "https://api.together.xyz/v1/embeddings" \
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "BAAI/bge-base-en-v1.5",
"input": [
"First document",
"Second document",
"Third document"
]
}'| Model | API String | Dimensions | Max Input |
|---|---|---|---|
| BGE Base EN v1.5 | | 768 | 512 tokens |
| Multilingual E5 Large | | 1024 | 514 tokens (recommended) |
response = client.rerank.create(
model="mixedbread-ai/Mxbai-Rerank-Large-V2",
query="What is the capital of France?",
documents=[
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
"The Eiffel Tower is in Paris.",
],
)
for result in response.results:
print(f"Index: {result.index}, Score: {result.relevance_score:.4f}")import Together from "together-ai";
const together = new Together();
const documents = [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"London is the capital of England.",
"The Eiffel Tower is in Paris.",
];
const response = await together.rerank.create({
model: "mixedbread-ai/Mxbai-Rerank-Large-V2",
query: "What is the capital of France?",
documents,
top_n: 2,
});
for (const result of response.results) {
console.log(`Index: ${result.index}, Score: ${result.relevance_score}`);
}curl -X POST "https://api.together.xyz/v1/rerank" \
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "mixedbread-ai/Mxbai-Rerank-Large-V2",
"query": "What is the capital of France?",
"documents": ["Paris is the capital of France.", "Berlin is the capital of Germany."]
}'| Parameter | Type | Description |
|---|---|---|
| string | Rerank model (required) |
| string | Search query (required) |
| string[] or object[] | Documents to rerank (required). Pass objects with named fields for structured documents. |
| int | Return top N results |
| bool | Include document text in response |
| string[] | Fields to use for ranking when documents are JSON objects (e.g., |
# 1. Generate query embedding
query_embedding = client.embeddings.create(
model="BAAI/bge-base-en-v1.5",
input="How does photosynthesis work?",
).data[0].embedding
# 2. Retrieve candidates from vector DB (your code)
candidates = vector_db.search(query_embedding, top_k=20)
# 3. Rerank for precision
reranked = client.rerank.create(
model="mixedbread-ai/Mxbai-Rerank-Large-V2",
query="How does photosynthesis work?",
documents=[c.text for c in candidates],
top_n=5,
)
# 4. Use top results as context for LLM
context = "\n".join([candidates[r.index].text for r in reranked.results])
response = client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
messages=[
{"role": "system", "content": f"Answer based on this context:\n{context}"},
{"role": "user", "content": "How does photosynthesis work?"},
],
)