Skip to content

Commit 33a221a

Browse files
authored
🚅 refactor: Implement Async Support for Embedding Routes (#160)
1 parent 34ecc17 commit 33a221a

File tree

2 files changed

+12
-29
lines changed

2 files changed

+12
-29
lines changed

app/routes/document_routes.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,13 @@ async def embed_local_file(
309309
user_id = entity_id if entity_id else request.state.user.get("id")
310310

311311
try:
312-
loader, known_type = get_loader(
312+
loader, known_type, file_ext = get_loader(
313313
document.filename, document.file_content_type, document.filepath
314314
)
315-
data = loader.load()
316-
result = await store_data_in_vector_db(data, document.file_id, user_id)
315+
data = await run_in_executor(None, loader.load)
316+
result = await store_data_in_vector_db(
317+
data, document.file_id, user_id, clean_content=file_ext == "pdf"
318+
)
317319

318320
if result:
319321
return {
@@ -388,7 +390,7 @@ async def embed_file(
388390
loader, known_type, file_ext = get_loader(
389391
file.filename, file.content_type, temp_file_path
390392
)
391-
data = loader.load()
393+
data = await run_in_executor(None, loader.load)
392394
result = await store_data_in_vector_db(
393395
data=data, file_id=file_id, user_id=user_id, clean_content=file_ext == "pdf"
394396
)
@@ -519,12 +521,14 @@ async def embed_file_upload(
519521
)
520522

521523
try:
522-
loader, known_type = get_loader(
524+
loader, known_type, file_ext = get_loader(
523525
uploaded_file.filename, uploaded_file.content_type, temp_file_path
524526
)
525527

526-
data = loader.load()
527-
result = await store_data_in_vector_db(data, file_id, user_id)
528+
data = await run_in_executor(None, loader.load)
529+
result = await store_data_in_vector_db(
530+
data, file_id, user_id, clean_content=file_ext == "pdf"
531+
)
528532

529533
if not result:
530534
raise HTTPException(

tests/test_main.py

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,6 @@ def test_embed_local_file(tmp_path, auth_headers, monkeypatch):
116116
test_file = tmp_path / "test.txt"
117117
test_file.write_text("This is a test document.")
118118

119-
# Override get_loader on the document_routes module so that /local/embed
120-
# returns exactly 2 values (loader, known_type) and a loader that returns Document objects.
121-
from app.routes import document_routes
122-
def dummy_get_loader(filename, file_content_type, filepath):
123-
class DummyLoader:
124-
def load(self):
125-
# Return a list of Document objects.
126-
return [Document(page_content="Dummy document", metadata={})]
127-
return DummyLoader(), True
128-
monkeypatch.setattr(document_routes, "get_loader", dummy_get_loader)
129-
130119
data = {
131120
"filepath": str(test_file),
132121
"filename": "test.txt",
@@ -166,16 +155,6 @@ def test_embed_file_upload(tmp_path, auth_headers, monkeypatch):
166155
test_file = tmp_path / "upload_test.txt"
167156
test_file.write_text(file_content)
168157

169-
# Override get_loader on the document_routes module so that /embed-upload
170-
# returns exactly 2 values and a loader that returns Document objects.
171-
from app.routes import document_routes
172-
def dummy_get_loader(filename, file_content_type, filepath):
173-
class DummyLoader:
174-
def load(self):
175-
return [Document(page_content="Dummy document", metadata={})]
176-
return DummyLoader(), True
177-
monkeypatch.setattr(document_routes, "get_loader", dummy_get_loader)
178-
179158
with test_file.open("rb") as f:
180159
response = client.post(
181160
"/embed-upload",
@@ -200,4 +179,4 @@ def test_query_multiple(auth_headers):
200179
assert isinstance(json_data, list)
201180
if json_data:
202181
doc = json_data[0][0]
203-
assert doc["page_content"] == "Queried content"
182+
assert doc["page_content"] == "Queried content"

0 commit comments

Comments
 (0)