Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions lumen/ai/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,16 @@ class VectorStore(LLMUser):
doc="List of metadata keys to exclude when creating the embeddings.",
)

situate = param.Boolean(
situate = param.ClassSelector(
class_=(int, bool),
default=False,
doc="""
Whether to insert a `llm_context` key in the metadata containing
contextual about the chunks.""",
Controls whether to add a `llm_context` key to the metadata
with contextual information about the chunks.
If set to True, all chunks will be situated.
If set to an integer, only chunks with a character
count less than or equal to this value will be situated.
If set to False, no chunks will be situated.""",
Comment on lines +89 to +94
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like an example in the doc, because I do not fully understand what is being added

)

def __init__(self, **params):
Expand Down Expand Up @@ -204,14 +209,16 @@ def _chunk_text(self, text: str, chunk_size: int | None = None, chunk_func: Call

return chunks

async def should_situate_chunk(self, chunk: str) -> bool:
async def should_situate_chunk(self, chunk: str, max_characters: int | None = None) -> bool:
"""
Determine whether a chunk should be situated based on its content.

Parameters
----------
chunk: str
The chunk text to evaluate
max_characters: int | None
The maximum character count for situating chunks

Returns
-------
Expand All @@ -220,6 +227,8 @@ async def should_situate_chunk(self, chunk: str) -> bool:
"""
if not self.llm:
return self.situate
elif max_characters is not None and len(chunk) > max_characters:
return False

try:
# Use a user message to avoid conflicts with system instructions
Expand All @@ -235,7 +244,7 @@ async def add(
self,
items: list[dict],
force_ids: list[int] | None = None,
situate: bool | None = None,
situate: int | bool | None = None,
) -> list[int]:
"""
Add items to the vector store.
Expand All @@ -260,6 +269,10 @@ async def add(

# Use the provided situate parameter or fall back to the class default
use_situate = self.situate if situate is None else situate
if isinstance(use_situate, int) and use_situate > 0:
max_characters = use_situate
else:
max_characters = None

for item in items:
text = item["text"]
Expand All @@ -281,7 +294,7 @@ async def add(
if should_situate and self.llm:
previous_context = None # Start with no previous context
for chunk in content_chunks:
needs_context = await self.should_situate_chunk(chunk)
needs_context = await self.should_situate_chunk(chunk, max_characters)
if not needs_context:
continue

Expand Down
Loading