46
46
async def process_files (
47
47
storage : StorageBase , skip_file_error : bool , ** processor_kwargs : dict [str , Any ]
48
48
) -> list [Document ]:
49
+ """
50
+ Process files in storage.
51
+ This function takes a StorageBase and return a list of langchain documents.
52
+
53
+ Args:
54
+ storage (StorageBase): The storage containing the files to process.
55
+ skip_file_error (bool): Whether to skip files that cannot be processed.
56
+ processor_kwargs (dict[str, Any]): Additional arguments for the processor.
57
+
58
+ Returns:
59
+ list[Document]: List of processed documents in the Langchain Document format.
60
+
61
+ Raises:
62
+ ValueError: If a file cannot be processed and skip_file_error is False.
63
+ Exception: If no processor is found for a file of a specific type and skip_file_error is False.
64
+
65
+ """
66
+
49
67
knowledge = []
50
68
for file in await storage .get_files ():
51
69
try :
@@ -71,6 +89,36 @@ async def process_files(
71
89
72
90
73
91
class Brain :
92
+ """
93
+ A class representing a Brain.
94
+
95
+ This class allows for the creation of a Brain, which is a collection of knowledge one wants to retrieve information from.
96
+
97
+ A Brain is set to:
98
+
99
+ * Store files in the storage of your choice (local, S3, etc.)
100
+ * Process the files in the storage to extract text and metadata in a wide range of format.
101
+ * Store the processed files in the vector store of your choice (FAISS, PGVector, etc.) - default to FAISS.
102
+ * Create an index of the processed files.
103
+ * Use the *Quivr* workflow for the retrieval augmented generation.
104
+
105
+ A Brain is able to:
106
+
107
+ * Search for information in the vector store.
108
+ * Answer questions about the knowledges in the Brain.
109
+ * Stream the answer to the question.
110
+
111
+ Attributes:
112
+ name (str): The name of the brain.
113
+ id (UUID): The unique identifier of the brain.
114
+ storage (StorageBase): The storage used to store the files.
115
+ llm (LLMEndpoint): The language model used to generate the answer.
116
+ vector_db (VectorStore): The vector store used to store the processed files.
117
+ embedder (Embeddings): The embeddings used to create the index of the processed files.
118
+
119
+
120
+ """
121
+
74
122
def __init__ (
75
123
self ,
76
124
* ,
@@ -106,6 +154,22 @@ def print_info(self):
106
154
107
155
@classmethod
108
156
def load (cls , folder_path : str | Path ) -> Self :
157
+ """
158
+ Load a brain from a folder path.
159
+
160
+ Args:
161
+ folder_path (str | Path): The path to the folder containing the brain.
162
+
163
+ Returns:
164
+ Brain: The brain loaded from the folder path.
165
+
166
+ Example:
167
+ ```python
168
+ brain_loaded = Brain.load("path/to/brain")
169
+ brain_loaded.print_info()
170
+ ```
171
+
172
+ """
109
173
if isinstance (folder_path , str ):
110
174
folder_path = Path (folder_path )
111
175
if not folder_path .exists ():
@@ -154,6 +218,20 @@ def load(cls, folder_path: str | Path) -> Self:
154
218
)
155
219
156
220
async def save (self , folder_path : str | Path ):
221
+ """
222
+ Save the brain to a folder path.
223
+
224
+ Args:
225
+ folder_path (str | Path): The path to the folder where the brain will be saved.
226
+
227
+ Returns:
228
+ str: The path to the folder where the brain was saved.
229
+
230
+ Example:
231
+ ```python
232
+ await brain.save("path/to/brain")
233
+ ```
234
+ """
157
235
if isinstance (folder_path , str ):
158
236
folder_path = Path (folder_path )
159
237
@@ -247,6 +325,28 @@ async def afrom_files(
247
325
skip_file_error : bool = False ,
248
326
processor_kwargs : dict [str , Any ] | None = None ,
249
327
):
328
+ """
329
+ Create a brain from a list of file paths.
330
+
331
+ Args:
332
+ name (str): The name of the brain.
333
+ file_paths (list[str | Path]): The list of file paths to add to the brain.
334
+ vector_db (VectorStore | None): The vector store used to store the processed files.
335
+ storage (StorageBase): The storage used to store the files.
336
+ llm (LLMEndpoint | None): The language model used to generate the answer.
337
+ embedder (Embeddings | None): The embeddings used to create the index of the processed files.
338
+ skip_file_error (bool): Whether to skip files that cannot be processed.
339
+ processor_kwargs (dict[str, Any] | None): Additional arguments for the processor.
340
+
341
+ Returns:
342
+ Brain: The brain created from the file paths.
343
+
344
+ Example:
345
+ ```python
346
+ brain = await Brain.afrom_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
347
+ brain.print_info()
348
+ ```
349
+ """
250
350
if llm is None :
251
351
llm = default_llm ()
252
352
@@ -327,6 +427,28 @@ async def afrom_langchain_documents(
327
427
llm : LLMEndpoint | None = None ,
328
428
embedder : Embeddings | None = None ,
329
429
) -> Self :
430
+ """
431
+ Create a brain from a list of langchain documents.
432
+
433
+ Args:
434
+ name (str): The name of the brain.
435
+ langchain_documents (list[Document]): The list of langchain documents to add to the brain.
436
+ vector_db (VectorStore | None): The vector store used to store the processed files.
437
+ storage (StorageBase): The storage used to store the files.
438
+ llm (LLMEndpoint | None): The language model used to generate the answer.
439
+ embedder (Embeddings | None): The embeddings used to create the index of the processed files.
440
+
441
+ Returns:
442
+ Brain: The brain created from the langchain documents.
443
+
444
+ Example:
445
+ ```python
446
+ from langchain_core.documents import Document
447
+ documents = [Document(page_content="Hello, world!")]
448
+ brain = await Brain.afrom_langchain_documents(name="My Brain", langchain_documents=documents)
449
+ brain.print_info()
450
+ ```
451
+ """
330
452
if llm is None :
331
453
llm = default_llm ()
332
454
@@ -357,6 +479,26 @@ async def asearch(
357
479
filter : Callable | Dict [str , Any ] | None = None ,
358
480
fetch_n_neighbors : int = 20 ,
359
481
) -> list [SearchResult ]:
482
+ """
483
+ Search for relevant documents in the brain based on a query.
484
+
485
+ Args:
486
+ query (str | Document): The query to search for.
487
+ n_results (int): The number of results to return.
488
+ filter (Callable | Dict[str, Any] | None): The filter to apply to the search.
489
+ fetch_n_neighbors (int): The number of neighbors to fetch.
490
+
491
+ Returns:
492
+ list[SearchResult]: The list of retrieved chunks.
493
+
494
+ Example:
495
+ ```python
496
+ brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
497
+ results = await brain.asearch("Why everybody loves Quivr?")
498
+ for result in results:
499
+ print(result.chunk.page_content)
500
+ ```
501
+ """
360
502
if not self .vector_db :
361
503
raise ValueError ("No vector db configured for this brain" )
362
504
@@ -383,6 +525,26 @@ def ask(
383
525
list_files : list [QuivrKnowledge ] | None = None ,
384
526
chat_history : ChatHistory | None = None ,
385
527
) -> ParsedRAGResponse :
528
+ """
529
+ Ask a question to the brain and get a generated answer.
530
+
531
+ Args:
532
+ question (str): The question to ask.
533
+ retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
534
+ rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
535
+ list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
536
+ chat_history (ChatHistory | None): The chat history to use.
537
+
538
+ Returns:
539
+ ParsedRAGResponse: The generated answer.
540
+
541
+ Example:
542
+ ```python
543
+ brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
544
+ answer = brain.ask("What is the meaning of life?")
545
+ print(answer.answer)
546
+ ```
547
+ """
386
548
llm = self .llm
387
549
388
550
# If you passed a different llm model we'll override the brain one
@@ -420,6 +582,27 @@ async def ask_streaming(
420
582
list_files : list [QuivrKnowledge ] | None = None ,
421
583
chat_history : ChatHistory | None = None ,
422
584
) -> AsyncGenerator [ParsedRAGChunkResponse , ParsedRAGChunkResponse ]:
585
+ """
586
+ Ask a question to the brain and get a streamed generated answer.
587
+
588
+ Args:
589
+ question (str): The question to ask.
590
+ retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
591
+ rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
592
+ list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
593
+ chat_history (ChatHistory | None): The chat history to use.
594
+
595
+ Returns:
596
+ AsyncGenerator[ParsedRAGChunkResponse, ParsedRAGChunkResponse]: The streamed generated answer.
597
+
598
+ Example:
599
+ ```python
600
+ brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
601
+ async for chunk in brain.ask_streaming("What is the meaning of life?"):
602
+ print(chunk.answer)
603
+ ```
604
+
605
+ """
423
606
llm = self .llm
424
607
425
608
# If you passed a different llm model we'll override the brain one
0 commit comments