run-llama
diff --git a/‎CHANGELOG.md‎
Lines changed: 15 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py‎
Lines changed: 116 additions & 51 deletions b/‎llama-index-integrations/embeddings/llama-index-embeddings-voyageai/llama_index/embeddings/voyageai/base.py‎
Lines changed: 116 additions & 51 deletions
diff --git a/‎llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎llama-index-integrations/embeddings/llama-index-embeddings-voyageai/pyproject.toml‎
Lines changed: 2 additions & 2 deletions
@@ -5,52 +5,67 @@
 ## [2025-10-26]
 
 ### llama-index-core [0.14.6]
+
 - Add allow_parallel_tool_calls for non-streaming ([#20117](https://github.com/run-llama/llama_index/pull/20117))
 - Fix invalid use of field-specific metadata ([#20122](https://github.com/run-llama/llama_index/pull/20122))
 - update doc for SemanticSplitterNodeParser ([#20125](https://github.com/run-llama/llama_index/pull/20125))
 - fix rare cases when sentence splits are larger than chunk size ([#20147](https://github.com/run-llama/llama_index/pull/20147))
 
 ### llama-index-embeddings-bedrock [0.7.0]
+
 - Fix BedrockEmbedding to support Cohere v4 response format ([#20094](https://github.com/run-llama/llama_index/pull/20094))
 
 ### llama-index-embeddings-isaacus [0.1.0]
+
 - feat: Isaacus embeddings integration ([#20124](https://github.com/run-llama/llama_index/pull/20124))
 
 ### llama-index-embeddings-oci-genai [0.4.2]
+
 - Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146))
 
 ### llama-index-llms-anthropic [0.9.7]
+
 - Fix double token stream in anthropic llm ([#20108](https://github.com/run-llama/llama_index/pull/20108))
 - Ensure anthropic content delta only has user facing response ([#20113](https://github.com/run-llama/llama_index/pull/20113))
 
 ### llama-index-llms-baseten [0.1.7]
+
 - add GLM ([#20121](https://github.com/run-llama/llama_index/pull/20121))
 
 ### llama-index-llms-helicone [0.1.0]
+
 - integrate helicone to llama-index ([#20131](https://github.com/run-llama/llama_index/pull/20131))
 
 ### llama-index-llms-oci-genai [0.6.4]
+
 - Update OCI GenAI cohere models ([#20146](https://github.com/run-llama/llama_index/pull/20146))
 
 ### llama-index-llms-openai [0.6.5]
+
 - chore: openai vbump ([#20095](https://github.com/run-llama/llama_index/pull/20095))
 
 ### llama-index-readers-imdb-review [0.4.2]
+
 - chore: Update selenium dependency in imdb-review reader ([#20105](https://github.com/run-llama/llama_index/pull/20105))
 
 ### llama-index-retrievers-bedrock [0.5.0]
+
 - feat(bedrock): add async support for AmazonKnowledgeBasesRetriever ([#20114](https://github.com/run-llama/llama_index/pull/20114))
 
 ### llama-index-retrievers-superlinked [0.1.3]
+
 - Update README.md ([#19829](https://github.com/run-llama/llama_index/pull/19829))
 
 ### llama-index-storage-kvstore-postgres [0.4.2]
+
 - fix: Replace raw SQL string interpolation with proper SQLAlchemy parameterized APIs in PostgresKVStore ([#20104](https://github.com/run-llama/llama_index/pull/20104))
 
 ### llama-index-tools-mcp [0.4.3]
+
 - Fix BasicMCPClient resource signatures ([#20118](https://github.com/run-llama/llama_index/pull/20118))
 
 ### llama-index-vector-stores-postgres [0.7.1]
+
 - Add GIN index support for text array metadata in PostgreSQL vector store ([#20130](https://github.com/run-llama/llama_index/pull/20130))
 
 ## [2025-10-15]
 
@@ -4,7 +4,7 @@
 import os
 from io import BytesIO
 from pathlib import Path
-from typing import Any, List, Optional, Union
+from typing import Any, Generator, List, Optional, Tuple, Union
 
 import voyageai
 from PIL import Image
@@ -17,14 +17,31 @@
 
 logger = logging.getLogger(__name__)
 
-DEFAULT_VOYAGE_2_BATCH_SIZE = 72
-DEFAULT_VOYAGE_3_LITE_BATCH_SIZE = 30
-DEFAULT_VOYAGE_3_BATCH_SIZE = 10
-DEFAULT_BATCH_SIZE = 7
+MAX_BATCH_SIZE = 1000
+
 MULTIMODAL_MODELS = ["voyage-multimodal-3"]
+CONTEXT_MODELS = ["voyage-context-3"]
 
 SUPPORTED_IMAGE_FORMATS = {"png", "jpeg", "jpg", "webp", "gif"}
 
+VOYAGE_TOTAL_TOKEN_LIMITS = {
+    "voyage-context-3": 32_000,
+    "voyage-3.5-lite": 1_000_000,
+    "voyage-3.5": 32_000,  # voyage-3.5 has 32k context window
+    "voyage-2": 320_000,
+    "voyage-3-large": 120_000,
+    "voyage-code-3": 120_000,
+    "voyage-large-2-instruct": 120_000,
+    "voyage-finance-2": 120_000,
+    "voyage-multilingual-2": 120_000,
+    "voyage-law-2": 120_000,
+    "voyage-large-2": 120_000,
+    "voyage-3": 120_000,
+    "voyage-3-lite": 120_000,
+    "voyage-code-2": 120_000,
+    "voyage-3-m-exp": 120_000,
+}
+
 
 class VoyageEmbedding(MultiModalEmbedding):
     """
@@ -76,19 +93,7 @@ def __init__(
             )
 
         if embed_batch_size is None:
-            embed_batch_size = (
-                DEFAULT_VOYAGE_2_BATCH_SIZE
-                if model_name in ["voyage-2", "voyage-02"]
-                else (
-                    DEFAULT_VOYAGE_3_LITE_BATCH_SIZE
-                    if model_name in ["voyage-3-lite", "voyage-3.5-lite"]
-                    else (
-                        DEFAULT_VOYAGE_3_BATCH_SIZE
-                        if model_name in ["voyage-3", "voyage-3.5", "voyage-context-3"]
-                        else DEFAULT_BATCH_SIZE
-                    )
-                )
-            )
+            embed_batch_size = MAX_BATCH_SIZE
 
         super().__init__(
             model_name=model_name,
@@ -116,6 +121,32 @@ def _validate_image_format(file_type: str) -> bool:
     def _texts_to_content(cls, input_strs: List[str]) -> List[dict]:
         return [{"content": [{"type": "text", "text": x}]} for x in input_strs]
 
+    def _build_batches(
+        self, texts: List[str]
+    ) -> Generator[Tuple[List[str], int], None, None]:
+        """Generate batches of texts based on token limits."""
+        max_tokens_per_batch = VOYAGE_TOTAL_TOKEN_LIMITS.get(self.model_name, 120_000)
+        index = 0
+
+        while index < len(texts):
+            batch: List[str] = []
+            batch_tokens = 0
+            while (
+                index < len(texts)
+                and len(batch) < min(self.embed_batch_size, MAX_BATCH_SIZE)
+                and batch_tokens < max_tokens_per_batch
+            ):
+                n_tokens = len(
+                    self._client.tokenize([texts[index]], model=self.model_name)[0]
+                )
+                if batch_tokens + n_tokens > max_tokens_per_batch and len(batch) > 0:
+                    break
+                batch_tokens += n_tokens
+                batch.append(texts[index])
+                index += 1
+
+            yield batch, len(batch)
+
     def _image_to_content(self, image_input: Union[str, Path, BytesIO]) -> Image:
         """Convert an image to a base64 Data URL."""
         if isinstance(image_input, (str, Path)):
@@ -177,41 +208,75 @@ async def _aget_image_embedding(self, img_file_path: ImageType) -> Embedding:
         return await self._aembed_image(img_file_path)
 
     def _embed(self, texts: List[str], input_type: str) -> List[List[float]]:
-        if self.model_name in MULTIMODAL_MODELS:
-            return self._client.multimodal_embed(
-                inputs=self._texts_to_content(texts),
-                model=self.model_name,
-                input_type=input_type,
-                truncation=self.truncation,
-            ).embeddings
-        else:
-            return self._client.embed(
-                texts,
-                model=self.model_name,
-                input_type=input_type,
-                truncation=self.truncation,
-                output_dtype=self.output_dtype,
-                output_dimension=self.output_dimension,
-            ).embeddings
+        """Embed texts with dynamic batching based on token limits."""
+        embeddings: List[List[float]] = []
+
+        for batch, _ in self._build_batches(texts):
+            if self.model_name in CONTEXT_MODELS:
+                r = self._client.contextualized_embed(
+                    inputs=[batch],
+                    model=self.model_name,
+                    input_type=input_type,
+                    output_dtype=self.output_dtype,
+                    output_dimension=self.output_dimension,
+                ).results
+                embeddings.extend(r[0].embeddings)
+            elif self.model_name in MULTIMODAL_MODELS:
+                batch_embeddings = self._client.multimodal_embed(
+                    inputs=self._texts_to_content(batch),
+                    model=self.model_name,
+                    input_type=input_type,
+                    truncation=self.truncation,
+                ).embeddings
+                embeddings.extend(batch_embeddings)
+            else:
+                batch_embeddings = self._client.embed(
+                    batch,
+                    model=self.model_name,
+                    input_type=input_type,
+                    truncation=self.truncation,
+                    output_dtype=self.output_dtype,
+                    output_dimension=self.output_dimension,
+                ).embeddings
+                embeddings.extend(batch_embeddings)
+
+        return embeddings
 
     async def _aembed(self, texts: List[str], input_type: str) -> List[List[float]]:
-        if self.model_name in MULTIMODAL_MODELS:
-            r = await self._aclient.multimodal_embed(
-                inputs=self._texts_to_content(texts),
-                model=self.model_name,
-                input_type=input_type,
-                truncation=self.truncation,
-            )
-        else:
-            r = await self._aclient.embed(
-                texts,
-                model=self.model_name,
-                input_type=input_type,
-                truncation=self.truncation,
-                output_dtype=self.output_dtype,
-                output_dimension=self.output_dimension,
-            )
-        return r.embeddings
+        """Asynchronously embed texts with dynamic batching based on token limits."""
+        embeddings: List[List[float]] = []
+
+        for batch, _ in self._build_batches(texts):
+            if self.model_name in CONTEXT_MODELS:
+                ar = await self._aclient.contextualized_embed(
+                    inputs=[batch],
+                    model=self.model_name,
+                    input_type=input_type,
+                    output_dtype=self.output_dtype,
+                    output_dimension=self.output_dimension,
+                )
+                r = ar.results
+                embeddings.extend(r[0].embeddings)
+            elif self.model_name in MULTIMODAL_MODELS:
+                r = await self._aclient.multimodal_embed(
+                    inputs=self._texts_to_content(batch),
+                    model=self.model_name,
+                    input_type=input_type,
+                    truncation=self.truncation,
+                )
+                embeddings.extend(r.embeddings)
+            else:
+                r = await self._aclient.embed(
+                    batch,
+                    model=self.model_name,
+                    input_type=input_type,
+                    truncation=self.truncation,
+                    output_dtype=self.output_dtype,
+                    output_dimension=self.output_dimension,
+                )
+                embeddings.extend(r.embeddings)
+
+        return embeddings
 
     def _get_query_embedding(self, query: str) -> List[float]:
         """Get query embedding."""
 
@@ -26,14 +26,14 @@ dev = [
 
 [project]
 name = "llama-index-embeddings-voyageai"
-version = "0.4.2"
+version = "0.5.0"
 description = "llama-index embeddings voyageai integration"
 authors = [{name = "Your Name", email = "[email protected]"}]
 requires-python = ">=3.9,<4.0"
 readme = "README.md"
 license = "MIT"
 dependencies = [
-    "voyageai>=0.3.2,<0.4.0 ; python_version >= '3.9' and python_version < '3.13'",
+    "voyageai>=0.3.5,<0.4.0 ; python_version >= '3.9' and python_version < '3.13'",
     "llama-index-core>=0.13.0,<0.15",
 ]