🎱 feat: Use Shared Thread Pool across Operations (#166)

sbruel · web-flow · commit 2b314735e065 · 2025-07-04T10:20:33.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ uploads/
 myenv/
 venv/
 *.pyc
+dev.yml
+SHOPIFY.md
diff --git a/app/routes/document_routes.py b/app/routes/document_routes.py
@@ -38,10 +38,10 @@
 
 
 @router.get("/ids")
-async def get_all_ids():
+async def get_all_ids(request: Request):
     try:
         if isinstance(vector_store, AsyncPgVector):
-            ids = await vector_store.get_all_ids()
+            ids = await vector_store.get_all_ids(executor=request.app.state.thread_pool)
         else:
             ids = vector_store.get_all_ids()
 
@@ -80,11 +80,11 @@ async def health_check():
 
 
 @router.get("/documents", response_model=list[DocumentResponse])
-async def get_documents_by_ids(ids: list[str] = Query(...)):
+async def get_documents_by_ids(request: Request, ids: list[str] = Query(...)):
     try:
         if isinstance(vector_store, AsyncPgVector):
-            existing_ids = await vector_store.get_filtered_ids(ids)
-            documents = await vector_store.get_documents_by_ids(ids)
+            existing_ids = await vector_store.get_filtered_ids(ids, executor=request.app.state.thread_pool)
+            documents = await vector_store.get_documents_by_ids(ids, executor=request.app.state.thread_pool)
         else:
             existing_ids = vector_store.get_filtered_ids(ids)
             documents = vector_store.get_documents_by_ids(ids)
@@ -118,11 +118,11 @@ async def get_documents_by_ids(ids: list[str] = Query(...)):
 
 
 @router.delete("/documents")
-async def delete_documents(document_ids: List[str] = Body(...)):
+async def delete_documents(request: Request, document_ids: List[str] = Body(...)):
     try:
         if isinstance(vector_store, AsyncPgVector):
-            existing_ids = await vector_store.get_filtered_ids(document_ids)
-            await vector_store.delete(ids=document_ids)
+            existing_ids = await vector_store.get_filtered_ids(document_ids, executor=request.app.state.thread_pool)
+            await vector_store.delete(ids=document_ids, executor=request.app.state.thread_pool)
         else:
             existing_ids = vector_store.get_filtered_ids(document_ids)
             vector_store.delete(ids=document_ids)
@@ -175,12 +175,11 @@ async def query_embeddings_by_file_id(
         embedding = get_cached_query_embedding(body.query)
 
         if isinstance(vector_store, AsyncPgVector):
-            documents = await run_in_executor(
-                None,
-                vector_store.similarity_search_with_score_by_vector,
+            documents = await vector_store.asimilarity_search_with_score_by_vector(
                 embedding,
                 k=body.k,
                 filter={"file_id": body.file_id},
+                executor=request.app.state.thread_pool
             )
         else:
             documents = vector_store.similarity_search_with_score_by_vector(
@@ -246,6 +245,7 @@ async def store_data_in_vector_db(
     file_id: str,
     user_id: str = "",
     clean_content: bool = False,
+    executor = None,
 ) -> bool:
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
@@ -274,7 +274,7 @@ async def store_data_in_vector_db(
     try:
         if isinstance(vector_store, AsyncPgVector):
             ids = await vector_store.aadd_documents(
-                docs, ids=[file_id] * len(documents)
+                docs, ids=[file_id] * len(documents), executor=executor
             )
         else:
             ids = vector_store.add_documents(docs, ids=[file_id] * len(documents))
@@ -312,9 +312,9 @@ async def embed_local_file(
         loader, known_type, file_ext = get_loader(
             document.filename, document.file_content_type, document.filepath
         )
-        data = await run_in_executor(None, loader.load)
+        data = await run_in_executor(request.app.state.thread_pool, loader.load)
         result = await store_data_in_vector_db(
-            data, document.file_id, user_id, clean_content=file_ext == "pdf"
+            data, document.file_id, user_id, clean_content=file_ext == "pdf", executor=request.app.state.thread_pool
         )
 
         if result:
@@ -390,9 +390,9 @@ async def embed_file(
         loader, known_type, file_ext = get_loader(
             file.filename, file.content_type, temp_file_path
         )
-        data = await run_in_executor(None, loader.load)
+        data = await run_in_executor(request.app.state.thread_pool, loader.load)
         result = await store_data_in_vector_db(
-            data=data, file_id=file_id, user_id=user_id, clean_content=file_ext == "pdf"
+            data=data, file_id=file_id, user_id=user_id, clean_content=file_ext == "pdf", executor=request.app.state.thread_pool
         )
 
         if not result:
@@ -454,12 +454,12 @@ async def embed_file(
 
 
 @router.get("/documents/{id}/context")
-async def load_document_context(id: str):
+async def load_document_context(request: Request, id: str):
     ids = [id]
     try:
         if isinstance(vector_store, AsyncPgVector):
-            existing_ids = await vector_store.get_filtered_ids(ids)
-            documents = await vector_store.get_documents_by_ids(ids)
+            existing_ids = await vector_store.get_filtered_ids(ids, executor=request.app.state.thread_pool)
+            documents = await vector_store.get_documents_by_ids(ids, executor=request.app.state.thread_pool)
         else:
             existing_ids = vector_store.get_filtered_ids(ids)
             documents = vector_store.get_documents_by_ids(ids)
@@ -525,9 +525,9 @@ async def embed_file_upload(
             uploaded_file.filename, uploaded_file.content_type, temp_file_path
         )
 
-        data = await run_in_executor(None, loader.load)
+        data = await run_in_executor(request.app.state.thread_pool, loader.load)
         result = await store_data_in_vector_db(
-            data, file_id, user_id, clean_content=file_ext == "pdf"
+            data, file_id, user_id, clean_content=file_ext == "pdf", executor=request.app.state.thread_pool
         )
 
         if not result:
@@ -566,19 +566,18 @@ async def embed_file_upload(
 
 
 @router.post("/query_multiple")
-async def query_embeddings_by_file_ids(body: QueryMultipleBody):
+async def query_embeddings_by_file_ids(request: Request, body: QueryMultipleBody):
     try:
         # Get the embedding of the query text
         embedding = get_cached_query_embedding(body.query)
 
         # Perform similarity search with the query embedding and filter by the file_ids in metadata
         if isinstance(vector_store, AsyncPgVector):
-            documents = await run_in_executor(
-                None,
-                vector_store.similarity_search_with_score_by_vector,
+            documents = await vector_store.asimilarity_search_with_score_by_vector(
                 embedding,
                 k=body.k,
                 filter={"file_id": {"$in": body.file_ids}},
+                executor=request.app.state.thread_pool
             )
         else:
             documents = vector_store.similarity_search_with_score_by_vector(
diff --git a/app/services/vector_store/async_pg_vector.py b/app/services/vector_store/async_pg_vector.py
@@ -1,19 +1,75 @@
-from typing import Optional
+from typing import Optional, List, Tuple, Dict, Any
+import asyncio
 from langchain_core.documents import Document
 from langchain_core.runnables.config import run_in_executor
 from .extended_pg_vector import ExtendedPgVector
 
 class AsyncPgVector(ExtendedPgVector):
-    async def get_all_ids(self) -> list[str]:
-        return await run_in_executor(None, super().get_all_ids)
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._thread_pool = None
     
-    async def get_filtered_ids(self, ids: list[str]) -> list[str]:
-        return await run_in_executor(None, super().get_filtered_ids, ids)
+    def _get_thread_pool(self):
+        if self._thread_pool is None:
+            try:
+                # Try to get the thread pool from FastAPI app state
+                import contextvars
+                from fastapi import Request
+                # This is a fallback - in practice, we'll pass the executor explicitly
+                loop = asyncio.get_running_loop()
+                self._thread_pool = getattr(loop, '_default_executor', None)
+            except:
+                pass
+        return self._thread_pool
+    
+    async def get_all_ids(self, executor=None) -> list[str]:
+        executor = executor or self._get_thread_pool()
+        return await run_in_executor(executor, super().get_all_ids)
+    
+    async def get_filtered_ids(self, ids: list[str], executor=None) -> list[str]:
+        executor = executor or self._get_thread_pool()
+        return await run_in_executor(executor, super().get_filtered_ids, ids)
 
-    async def get_documents_by_ids(self, ids: list[str]) -> list[Document]:
-        return await run_in_executor(None, super().get_documents_by_ids, ids)
+    async def get_documents_by_ids(self, ids: list[str], executor=None) -> list[Document]:
+        executor = executor or self._get_thread_pool()
+        return await run_in_executor(executor, super().get_documents_by_ids, ids)
 
     async def delete(
-        self, ids: Optional[list[str]] = None, collection_only: bool = False
+        self, ids: Optional[list[str]] = None, collection_only: bool = False, executor=None
     ) -> None:
-        await run_in_executor(None, self._delete_multiple, ids, collection_only)
+        executor = executor or self._get_thread_pool()
+        await run_in_executor(executor, self._delete_multiple, ids, collection_only)
+    
+    async def asimilarity_search_with_score_by_vector(
+        self, 
+        embedding: List[float], 
+        k: int = 4, 
+        filter: Optional[Dict[str, Any]] = None,
+        executor=None
+    ) -> List[Tuple[Document, float]]:
+        """Async version of similarity_search_with_score_by_vector"""
+        executor = executor or self._get_thread_pool()
+        return await run_in_executor(
+            executor, 
+            super().similarity_search_with_score_by_vector, 
+            embedding, 
+            k, 
+            filter
+        )
+    
+    async def aadd_documents(
+        self, 
+        documents: List[Document], 
+        ids: Optional[List[str]] = None,
+        executor=None,
+        **kwargs
+    ) -> List[str]:
+        """Async version of add_documents"""
+        executor = executor or self._get_thread_pool()
+        return await run_in_executor(
+            executor, 
+            super().add_documents, 
+            documents, 
+            ids=ids,
+            **kwargs
+        )
diff --git a/main.py b/main.py
@@ -1,9 +1,11 @@
 # main.py
+import os
 import uvicorn
 from fastapi import FastAPI, Request
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
+from concurrent.futures import ThreadPoolExecutor
 
 from starlette.responses import JSONResponse
 
@@ -16,11 +18,21 @@
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup logic goes here
+    # Create bounded thread pool executor based on CPU cores
+    max_workers = min(int(os.getenv("RAG_THREAD_POOL_SIZE", str(os.cpu_count()))), 8)  # Cap at 8
+    app.state.thread_pool = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="rag-worker")
+    logger.info(f"Initialized thread pool with {max_workers} workers (CPU cores: {os.cpu_count()})")
+    
     if VECTOR_DB_TYPE == VectorDBType.PGVECTOR:
         await PSQLDatabase.get_pool()  # Initialize the pool
         await ensure_custom_id_index_on_embedding()
 
     yield
+    
+    # Cleanup logic
+    logger.info("Shutting down thread pool")
+    app.state.thread_pool.shutdown(wait=True)
+    logger.info("Thread pool shutdown complete")
 
 app = FastAPI(lifespan=lifespan, debug=debug_mode)
 
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -4,6 +4,7 @@
 import pytest
 from fastapi.testclient import TestClient
 from langchain_core.documents import Document
+from concurrent.futures import ThreadPoolExecutor
 
 from main import app
 
@@ -24,19 +25,23 @@ def auth_headers():
 def override_vector_store(monkeypatch):
     from app.config import vector_store
 
+    # Initialize thread pool for tests since TestClient doesn't run lifespan
+    if not hasattr(app.state, 'thread_pool') or app.state.thread_pool is None:
+        app.state.thread_pool = ThreadPoolExecutor(max_workers=2, thread_name_prefix="test-worker")
+
     # Override get_all_ids as an async function.
-    async def dummy_get_all_ids():
+    async def dummy_get_all_ids(executor=None):
         return ["testid1", "testid2"]
     monkeypatch.setattr(vector_store, "get_all_ids", dummy_get_all_ids)
 
     # Override get_filtered_ids as an async function.
-    async def dummy_get_filtered_ids(ids):
+    async def dummy_get_filtered_ids(ids, executor=None):
         dummy_ids = ["testid1", "testid2"]
         return [id for id in dummy_ids if id in ids]
     monkeypatch.setattr(vector_store, "get_filtered_ids", dummy_get_filtered_ids)
 
     # Override get_documents_by_ids as an async function.
-    async def dummy_get_documents_by_ids(ids):
+    async def dummy_get_documents_by_ids(ids, executor=None):
         return [
             Document(page_content="Test content", metadata={"file_id": id})
             for id in ids
@@ -56,22 +61,35 @@ def dummy_similarity_search_with_score_by_vector(embedding, k, filter):
             metadata={"file_id": filter.get("file_id", "testid1"), "user_id": "testuser"},
         )
         return [(doc, 0.9)]
+    
+    async def dummy_asimilarity_search_with_score_by_vector(embedding, k, filter=None, executor=None):
+        doc = Document(
+            page_content="Queried content",
+            metadata={"file_id": filter.get("file_id", "testid1") if filter else "testid1", "user_id": "testuser"},
+        )
+        return [(doc, 0.9)]
+    
     monkeypatch.setattr(
         vector_store,
         "similarity_search_with_score_by_vector",
         dummy_similarity_search_with_score_by_vector,
     )
+    monkeypatch.setattr(
+        vector_store,
+        "asimilarity_search_with_score_by_vector",
+        dummy_asimilarity_search_with_score_by_vector,
+    )
 
     # Override document addition functions.
     def dummy_add_documents(docs, ids):
         return ids
-    async def dummy_aadd_documents(docs, ids):
+    async def dummy_aadd_documents(docs, ids=None, executor=None):
         return ids
     monkeypatch.setattr(vector_store, "add_documents", dummy_add_documents)
     monkeypatch.setattr(vector_store, "aadd_documents", dummy_aadd_documents)
 
     # Override delete function.
-    async def dummy_delete(ids=None, collection_only=False):
+    async def dummy_delete(ids=None, collection_only=False, executor=None):
         return None
     monkeypatch.setattr(vector_store, "delete", dummy_delete)
 

-Original file line number
+Diff line change
 myenv/
 venv/
 *.pyc
 +dev.yml
 +SHOPIFY.md