Skip to content

Commit f16f0ad

Browse files
authored
Updating the VoyageAI integration (#20073)
* Introducing voyage-3.5 * Increase the package version * Introducing contextual embedding models * supporting voyage-context-3 token counting adding integration tests * Lint correction * Correcting the default batch size * Bump the minor version * Correcting the tests * Correcting/formatting
1 parent df414be commit f16f0ad

File tree

5 files changed

+707
-57
lines changed

5 files changed

+707
-57
lines changed

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,52 +5,67 @@
55
## [2025-10-26]
66

77
### llama-index-core [0.14.6]
8+
89
- Add allow_parallel_tool_calls for non-streaming ([#20117](https://github.com/run-llama/llama_index/pull/20117))
910
- Fix invalid use of field-specific metadata ([#20122](https://github.com/run-llama/llama_index/pull/20122))
1011
- update doc for SemanticSplitterNodeParser ([#20125](https://github.com/run-llama/llama_index/pull/20125))
1112
- fix rare cases when sentence splits are larger than chunk size ([#20147](https://github.com/run-llama/llama_index/pull/20147))
1213

1314
### llama-index-embeddings-bedrock [0.7.0]
15+
1416
- Fix BedrockEmbedding to support Cohere v4 response format ([#20094](https://github.com/run-llama/llama_index/pull/20094))
1517

1618
### llama-index-embeddings-isaacus [0.1.0]
19+
1720
- feat: Isaacus embeddings integration ([#20124](https://github.com/run-llama/llama_index/pull/20124))
1821

1922
### llama-index-embeddings-oci-genai [0.4.2]
23+
2024
- Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146))
2125

2226
### llama-index-llms-anthropic [0.9.7]
27+
2328
- Fix double token stream in anthropic llm ([#20108](https://github.com/run-llama/llama_index/pull/20108))
2429
- Ensure anthropic content delta only has user facing response ([#20113](https://github.com/run-llama/llama_index/pull/20113))
2530

2631
### llama-index-llms-baseten [0.1.7]
32+
2733
- add GLM ([#20121](https://github.com/run-llama/llama_index/pull/20121))
2834

2935
### llama-index-llms-helicone [0.1.0]
36+
3037
- integrate helicone to llama-index ([#20131](https://github.com/run-llama/llama_index/pull/20131))
3138

3239
### llama-index-llms-oci-genai [0.6.4]
40+
3341
- Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146))
3442

3543
### llama-index-llms-openai [0.6.5]
44+
3645
- chore: openai vbump ([#20095](https://github.com/run-llama/llama_index/pull/20095))
3746

3847
### llama-index-readers-imdb-review [0.4.2]
48+
3949
- chore: Update selenium dependency in imdb-review reader ([#20105](https://github.com/run-llama/llama_index/pull/20105))
4050

4151
### llama-index-retrievers-bedrock [0.5.0]
52+
4253
- feat(bedrock): add async support for AmazonKnowledgeBasesRetriever ([#20114](https://github.com/run-llama/llama_index/pull/20114))
4354

4455
### llama-index-retrievers-superlinked [0.1.3]
56+
4557
- Update README.md ([#19829](https://github.com/run-llama/llama_index/pull/19829))
4658

4759
### llama-index-storage-kvstore-postgres [0.4.2]
60+
4861
- fix: Replace raw SQL string interpolation with proper SQLAlchemy parameterized APIs in PostgresKVStore ([#20104](https://github.com/run-llama/llama_index/pull/20104))
4962

5063
### llama-index-tools-mcp [0.4.3]
64+
5165
- Fix BasicMCPClient resource signatures ([#20118](https://github.com/run-llama/llama_index/pull/20118))
5266

5367
### llama-index-vector-stores-postgres [0.7.1]
68+
5469
- Add GIN index support for text array metadata in PostgreSQL vector store ([#20130](https://github.com/run-llama/llama_index/pull/20130))
5570

5671
## [2025-10-15]

llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py

Lines changed: 116 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55
from io import BytesIO
66
from pathlib import Path
7-
from typing import Any, List, Optional, Union
7+
from typing import Any, Generator, List, Optional, Tuple, Union
88

99
import voyageai
1010
from PIL import Image
@@ -17,14 +17,31 @@
1717

1818
logger = logging.getLogger(__name__)
1919

20-
DEFAULT_VOYAGE_2_BATCH_SIZE = 72
21-
DEFAULT_VOYAGE_3_LITE_BATCH_SIZE = 30
22-
DEFAULT_VOYAGE_3_BATCH_SIZE = 10
23-
DEFAULT_BATCH_SIZE = 7
20+
MAX_BATCH_SIZE = 1000
21+
2422
MULTIMODAL_MODELS = ["voyage-multimodal-3"]
23+
CONTEXT_MODELS = ["voyage-context-3"]
2524

2625
SUPPORTED_IMAGE_FORMATS = {"png", "jpeg", "jpg", "webp", "gif"}
2726

27+
VOYAGE_TOTAL_TOKEN_LIMITS = {
28+
"voyage-context-3": 32_000,
29+
"voyage-3.5-lite": 1_000_000,
30+
"voyage-3.5": 32_000, # voyage-3.5 has 32k context window
31+
"voyage-2": 320_000,
32+
"voyage-3-large": 120_000,
33+
"voyage-code-3": 120_000,
34+
"voyage-large-2-instruct": 120_000,
35+
"voyage-finance-2": 120_000,
36+
"voyage-multilingual-2": 120_000,
37+
"voyage-law-2": 120_000,
38+
"voyage-large-2": 120_000,
39+
"voyage-3": 120_000,
40+
"voyage-3-lite": 120_000,
41+
"voyage-code-2": 120_000,
42+
"voyage-3-m-exp": 120_000,
43+
}
44+
2845

2946
class VoyageEmbedding(MultiModalEmbedding):
3047
"""
@@ -76,19 +93,7 @@ def __init__(
7693
)
7794

7895
if embed_batch_size is None:
79-
embed_batch_size = (
80-
DEFAULT_VOYAGE_2_BATCH_SIZE
81-
if model_name in ["voyage-2", "voyage-02"]
82-
else (
83-
DEFAULT_VOYAGE_3_LITE_BATCH_SIZE
84-
if model_name in ["voyage-3-lite", "voyage-3.5-lite"]
85-
else (
86-
DEFAULT_VOYAGE_3_BATCH_SIZE
87-
if model_name in ["voyage-3", "voyage-3.5", "voyage-context-3"]
88-
else DEFAULT_BATCH_SIZE
89-
)
90-
)
91-
)
96+
embed_batch_size = MAX_BATCH_SIZE
9297

9398
super().__init__(
9499
model_name=model_name,
@@ -116,6 +121,32 @@ def _validate_image_format(file_type: str) -> bool:
116121
def _texts_to_content(cls, input_strs: List[str]) -> List[dict]:
117122
return [{"content": [{"type": "text", "text": x}]} for x in input_strs]
118123

124+
def _build_batches(
125+
self, texts: List[str]
126+
) -> Generator[Tuple[List[str], int], None, None]:
127+
"""Generate batches of texts based on token limits."""
128+
max_tokens_per_batch = VOYAGE_TOTAL_TOKEN_LIMITS.get(self.model_name, 120_000)
129+
index = 0
130+
131+
while index < len(texts):
132+
batch: List[str] = []
133+
batch_tokens = 0
134+
while (
135+
index < len(texts)
136+
and len(batch) < min(self.embed_batch_size, MAX_BATCH_SIZE)
137+
and batch_tokens < max_tokens_per_batch
138+
):
139+
n_tokens = len(
140+
self._client.tokenize([texts[index]], model=self.model_name)[0]
141+
)
142+
if batch_tokens + n_tokens > max_tokens_per_batch and len(batch) > 0:
143+
break
144+
batch_tokens += n_tokens
145+
batch.append(texts[index])
146+
index += 1
147+
148+
yield batch, len(batch)
149+
119150
def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> Image:
120151
"""Convert an image to a base64 Data URL."""
121152
if isinstance(image_input, (str, Path)):
@@ -177,41 +208,75 @@ async def _aget_image_embedding(self, img_file_path: ImageType) -> Embedding:
177208
return await self._aembed_image(img_file_path)
178209

179210
def _embed(self, texts: List[str], input_type: str) -> List[List[float]]:
180-
if self.model_name in MULTIMODAL_MODELS:
181-
return self._client.multimodal_embed(
182-
inputs=self._texts_to_content(texts),
183-
model=self.model_name,
184-
input_type=input_type,
185-
truncation=self.truncation,
186-
).embeddings
187-
else:
188-
return self._client.embed(
189-
texts,
190-
model=self.model_name,
191-
input_type=input_type,
192-
truncation=self.truncation,
193-
output_dtype=self.output_dtype,
194-
output_dimension=self.output_dimension,
195-
).embeddings
211+
"""Embed texts with dynamic batching based on token limits."""
212+
embeddings: List[List[float]] = []
213+
214+
for batch, _ in self._build_batches(texts):
215+
if self.model_name in CONTEXT_MODELS:
216+
r = self._client.contextualized_embed(
217+
inputs=[batch],
218+
model=self.model_name,
219+
input_type=input_type,
220+
output_dtype=self.output_dtype,
221+
output_dimension=self.output_dimension,
222+
).results
223+
embeddings.extend(r[0].embeddings)
224+
elif self.model_name in MULTIMODAL_MODELS:
225+
batch_embeddings = self._client.multimodal_embed(
226+
inputs=self._texts_to_content(batch),
227+
model=self.model_name,
228+
input_type=input_type,
229+
truncation=self.truncation,
230+
).embeddings
231+
embeddings.extend(batch_embeddings)
232+
else:
233+
batch_embeddings = self._client.embed(
234+
batch,
235+
model=self.model_name,
236+
input_type=input_type,
237+
truncation=self.truncation,
238+
output_dtype=self.output_dtype,
239+
output_dimension=self.output_dimension,
240+
).embeddings
241+
embeddings.extend(batch_embeddings)
242+
243+
return embeddings
196244

197245
async def _aembed(self, texts: List[str], input_type: str) -> List[List[float]]:
198-
if self.model_name in MULTIMODAL_MODELS:
199-
r = await self._aclient.multimodal_embed(
200-
inputs=self._texts_to_content(texts),
201-
model=self.model_name,
202-
input_type=input_type,
203-
truncation=self.truncation,
204-
)
205-
else:
206-
r = await self._aclient.embed(
207-
texts,
208-
model=self.model_name,
209-
input_type=input_type,
210-
truncation=self.truncation,
211-
output_dtype=self.output_dtype,
212-
output_dimension=self.output_dimension,
213-
)
214-
return r.embeddings
246+
"""Asynchronously embed texts with dynamic batching based on token limits."""
247+
embeddings: List[List[float]] = []
248+
249+
for batch, _ in self._build_batches(texts):
250+
if self.model_name in CONTEXT_MODELS:
251+
ar = await self._aclient.contextualized_embed(
252+
inputs=[batch],
253+
model=self.model_name,
254+
input_type=input_type,
255+
output_dtype=self.output_dtype,
256+
output_dimension=self.output_dimension,
257+
)
258+
r = ar.results
259+
embeddings.extend(r[0].embeddings)
260+
elif self.model_name in MULTIMODAL_MODELS:
261+
r = await self._aclient.multimodal_embed(
262+
inputs=self._texts_to_content(batch),
263+
model=self.model_name,
264+
input_type=input_type,
265+
truncation=self.truncation,
266+
)
267+
embeddings.extend(r.embeddings)
268+
else:
269+
r = await self._aclient.embed(
270+
batch,
271+
model=self.model_name,
272+
input_type=input_type,
273+
truncation=self.truncation,
274+
output_dtype=self.output_dtype,
275+
output_dimension=self.output_dimension,
276+
)
277+
embeddings.extend(r.embeddings)
278+
279+
return embeddings
215280

216281
def _get_query_embedding(self, query: str) -> List[float]:
217282
"""Get query embedding."""

llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ dev = [
2626

2727
[project]
2828
name = "llama-index-embeddings-voyageai"
29-
version = "0.4.2"
29+
version = "0.5.0"
3030
description = "llama-index embeddings voyageai integration"
3131
authors = [{name = "Your Name", email = "[email protected]"}]
3232
requires-python = ">=3.9,<4.0"
3333
readme = "README.md"
3434
license = "MIT"
3535
dependencies = [
36-
"voyageai>=0.3.2,<0.4.0 ; python_version >= '3.9' and python_version < '3.13'",
36+
"voyageai>=0.3.5,<0.4.0 ; python_version >= '3.9' and python_version < '3.13'",
3737
"llama-index-core>=0.13.0,<0.15",
3838
]
3939

0 commit comments

Comments
 (0)