|
38 | 38 |
|
39 | 39 |
|
40 | 40 | @router.get("/ids")
|
41 |
| -async def get_all_ids(): |
| 41 | +async def get_all_ids(request: Request): |
42 | 42 | try:
|
43 | 43 | if isinstance(vector_store, AsyncPgVector):
|
44 |
| - ids = await vector_store.get_all_ids() |
| 44 | + ids = await vector_store.get_all_ids(executor=request.app.state.thread_pool) |
45 | 45 | else:
|
46 | 46 | ids = vector_store.get_all_ids()
|
47 | 47 |
|
@@ -80,11 +80,11 @@ async def health_check():
|
80 | 80 |
|
81 | 81 |
|
82 | 82 | @router.get("/documents", response_model=list[DocumentResponse])
|
83 |
| -async def get_documents_by_ids(ids: list[str] = Query(...)): |
| 83 | +async def get_documents_by_ids(request: Request, ids: list[str] = Query(...)): |
84 | 84 | try:
|
85 | 85 | if isinstance(vector_store, AsyncPgVector):
|
86 |
| - existing_ids = await vector_store.get_filtered_ids(ids) |
87 |
| - documents = await vector_store.get_documents_by_ids(ids) |
| 86 | + existing_ids = await vector_store.get_filtered_ids(ids, executor=request.app.state.thread_pool) |
| 87 | + documents = await vector_store.get_documents_by_ids(ids, executor=request.app.state.thread_pool) |
88 | 88 | else:
|
89 | 89 | existing_ids = vector_store.get_filtered_ids(ids)
|
90 | 90 | documents = vector_store.get_documents_by_ids(ids)
|
@@ -118,11 +118,11 @@ async def get_documents_by_ids(ids: list[str] = Query(...)):
|
118 | 118 |
|
119 | 119 |
|
120 | 120 | @router.delete("/documents")
|
121 |
| -async def delete_documents(document_ids: List[str] = Body(...)): |
| 121 | +async def delete_documents(request: Request, document_ids: List[str] = Body(...)): |
122 | 122 | try:
|
123 | 123 | if isinstance(vector_store, AsyncPgVector):
|
124 |
| - existing_ids = await vector_store.get_filtered_ids(document_ids) |
125 |
| - await vector_store.delete(ids=document_ids) |
| 124 | + existing_ids = await vector_store.get_filtered_ids(document_ids, executor=request.app.state.thread_pool) |
| 125 | + await vector_store.delete(ids=document_ids, executor=request.app.state.thread_pool) |
126 | 126 | else:
|
127 | 127 | existing_ids = vector_store.get_filtered_ids(document_ids)
|
128 | 128 | vector_store.delete(ids=document_ids)
|
@@ -175,12 +175,11 @@ async def query_embeddings_by_file_id(
|
175 | 175 | embedding = get_cached_query_embedding(body.query)
|
176 | 176 |
|
177 | 177 | if isinstance(vector_store, AsyncPgVector):
|
178 |
| - documents = await run_in_executor( |
179 |
| - None, |
180 |
| - vector_store.similarity_search_with_score_by_vector, |
| 178 | + documents = await vector_store.asimilarity_search_with_score_by_vector( |
181 | 179 | embedding,
|
182 | 180 | k=body.k,
|
183 | 181 | filter={"file_id": body.file_id},
|
| 182 | + executor=request.app.state.thread_pool |
184 | 183 | )
|
185 | 184 | else:
|
186 | 185 | documents = vector_store.similarity_search_with_score_by_vector(
|
@@ -246,6 +245,7 @@ async def store_data_in_vector_db(
|
246 | 245 | file_id: str,
|
247 | 246 | user_id: str = "",
|
248 | 247 | clean_content: bool = False,
|
| 248 | + executor = None, |
249 | 249 | ) -> bool:
|
250 | 250 | text_splitter = RecursiveCharacterTextSplitter(
|
251 | 251 | chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
|
@@ -274,7 +274,7 @@ async def store_data_in_vector_db(
|
274 | 274 | try:
|
275 | 275 | if isinstance(vector_store, AsyncPgVector):
|
276 | 276 | ids = await vector_store.aadd_documents(
|
277 |
| - docs, ids=[file_id] * len(documents) |
| 277 | + docs, ids=[file_id] * len(documents), executor=executor |
278 | 278 | )
|
279 | 279 | else:
|
280 | 280 | ids = vector_store.add_documents(docs, ids=[file_id] * len(documents))
|
@@ -312,9 +312,9 @@ async def embed_local_file(
|
312 | 312 | loader, known_type, file_ext = get_loader(
|
313 | 313 | document.filename, document.file_content_type, document.filepath
|
314 | 314 | )
|
315 |
| - data = await run_in_executor(None, loader.load) |
| 315 | + data = await run_in_executor(request.app.state.thread_pool, loader.load) |
316 | 316 | result = await store_data_in_vector_db(
|
317 |
| - data, document.file_id, user_id, clean_content=file_ext == "pdf" |
| 317 | + data, document.file_id, user_id, clean_content=file_ext == "pdf", executor=request.app.state.thread_pool |
318 | 318 | )
|
319 | 319 |
|
320 | 320 | if result:
|
@@ -390,9 +390,9 @@ async def embed_file(
|
390 | 390 | loader, known_type, file_ext = get_loader(
|
391 | 391 | file.filename, file.content_type, temp_file_path
|
392 | 392 | )
|
393 |
| - data = await run_in_executor(None, loader.load) |
| 393 | + data = await run_in_executor(request.app.state.thread_pool, loader.load) |
394 | 394 | result = await store_data_in_vector_db(
|
395 |
| - data=data, file_id=file_id, user_id=user_id, clean_content=file_ext == "pdf" |
| 395 | + data=data, file_id=file_id, user_id=user_id, clean_content=file_ext == "pdf", executor=request.app.state.thread_pool |
396 | 396 | )
|
397 | 397 |
|
398 | 398 | if not result:
|
@@ -454,12 +454,12 @@ async def embed_file(
|
454 | 454 |
|
455 | 455 |
|
456 | 456 | @router.get("/documents/{id}/context")
|
457 |
| -async def load_document_context(id: str): |
| 457 | +async def load_document_context(request: Request, id: str): |
458 | 458 | ids = [id]
|
459 | 459 | try:
|
460 | 460 | if isinstance(vector_store, AsyncPgVector):
|
461 |
| - existing_ids = await vector_store.get_filtered_ids(ids) |
462 |
| - documents = await vector_store.get_documents_by_ids(ids) |
| 461 | + existing_ids = await vector_store.get_filtered_ids(ids, executor=request.app.state.thread_pool) |
| 462 | + documents = await vector_store.get_documents_by_ids(ids, executor=request.app.state.thread_pool) |
463 | 463 | else:
|
464 | 464 | existing_ids = vector_store.get_filtered_ids(ids)
|
465 | 465 | documents = vector_store.get_documents_by_ids(ids)
|
@@ -525,9 +525,9 @@ async def embed_file_upload(
|
525 | 525 | uploaded_file.filename, uploaded_file.content_type, temp_file_path
|
526 | 526 | )
|
527 | 527 |
|
528 |
| - data = await run_in_executor(None, loader.load) |
| 528 | + data = await run_in_executor(request.app.state.thread_pool, loader.load) |
529 | 529 | result = await store_data_in_vector_db(
|
530 |
| - data, file_id, user_id, clean_content=file_ext == "pdf" |
| 530 | + data, file_id, user_id, clean_content=file_ext == "pdf", executor=request.app.state.thread_pool |
531 | 531 | )
|
532 | 532 |
|
533 | 533 | if not result:
|
@@ -566,19 +566,18 @@ async def embed_file_upload(
|
566 | 566 |
|
567 | 567 |
|
568 | 568 | @router.post("/query_multiple")
|
569 |
| -async def query_embeddings_by_file_ids(body: QueryMultipleBody): |
| 569 | +async def query_embeddings_by_file_ids(request: Request, body: QueryMultipleBody): |
570 | 570 | try:
|
571 | 571 | # Get the embedding of the query text
|
572 | 572 | embedding = get_cached_query_embedding(body.query)
|
573 | 573 |
|
574 | 574 | # Perform similarity search with the query embedding and filter by the file_ids in metadata
|
575 | 575 | if isinstance(vector_store, AsyncPgVector):
|
576 |
| - documents = await run_in_executor( |
577 |
| - None, |
578 |
| - vector_store.similarity_search_with_score_by_vector, |
| 576 | + documents = await vector_store.asimilarity_search_with_score_by_vector( |
579 | 577 | embedding,
|
580 | 578 | k=body.k,
|
581 | 579 | filter={"file_id": {"$in": body.file_ids}},
|
| 580 | + executor=request.app.state.thread_pool |
582 | 581 | )
|
583 | 582 | else:
|
584 | 583 | documents = vector_store.similarity_search_with_score_by_vector(
|
|
0 commit comments