feat: add system_prompt parameter to Brain methods and enhance message handling in RAG pipeline (#3625)

StanGirard · web-flow · commit 500d7931582d · 2025-04-10T11:53:47.000-07:00
# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):
diff --git a/core/quivr_core/brain/brain.py b/core/quivr_core/brain/brain.py
@@ -496,6 +496,7 @@ def add_file(self) -> None:
     async def ask_streaming(
         self,
         question: str,
+        system_prompt: str | None = None,
         retrieval_config: RetrievalConfig | None = None,
         rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
         list_files: list[QuivrKnowledge] | None = None,
@@ -542,6 +543,7 @@ async def ask_streaming(
         }
         async for response in rag_instance.answer_astream(
             question=question,
+            system_prompt=system_prompt or None,
             history=chat_history,
             list_files=list_files,
             metadata=metadata,
@@ -560,6 +562,7 @@ async def ask_streaming(
     async def aask(
         self,
         question: str,
+        system_prompt: str | None = None,
         retrieval_config: RetrievalConfig | None = None,
         rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
         list_files: list[QuivrKnowledge] | None = None,
@@ -582,6 +585,7 @@ async def aask(
 
         async for response in self.ask_streaming(
             question=question,
+            system_prompt=system_prompt,
             retrieval_config=retrieval_config,
             rag_pipeline=rag_pipeline,
             list_files=list_files,
@@ -595,6 +599,7 @@ async def aask(
     def ask(
         self,
         question: str,
+        system_prompt: str | None = None,
         retrieval_config: RetrievalConfig | None = None,
         rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
         list_files: list[QuivrKnowledge] | None = None,
@@ -604,6 +609,7 @@ def ask(
         Fully synchronous version that asks a question to the brain and gets a generated answer.
         Args:
             question (str): The question to ask.
+            system_prompt (str | None): The system prompt to use.
             retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
             rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
             list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
@@ -615,6 +621,7 @@ def ask(
         return loop.run_until_complete(
             self.aask(
                 question=question,
+                system_prompt=system_prompt,
                 retrieval_config=retrieval_config,
                 rag_pipeline=rag_pipeline,
                 list_files=list_files,
diff --git a/core/quivr_core/rag/prompts.py b/core/quivr_core/rag/prompts.py
@@ -263,55 +263,69 @@ def _define_custom_prompts() -> dict[TemplatePromptName, BasePromptTemplate]:
     custom_prompts[TemplatePromptName.TOOL_ROUTING_PROMPT] = TOOL_ROUTING_PROMPT
 
     system_message_zendesk_template = """
-    - You are a Zendesk Agent.
-    - You are answering a client query.
-    - You must provide a response with all the information you have. Do not write areas to be filled like [your name], [your email], etc. 
-    - Do NOT invent information that was not present in previous tickets or in user metabadata or ticket metadata
-    - Always prioritize information from the most recent tickets, espcially if they are contradictory.
-
-
-    Here are instructions that you MUST follow:
-    <instructions from me>
-    {guidelines}
-    </instructions from me>
+    You are a Customer Service Agent using Zendesk. You are answering a client query.
+    You will be provided with the users metadata, ticket metadata and ticket history which can be used to answer the query.
+    You will also have access to the most relevant similar tickets and additional information sometimes such as API calls.
+    Never add something in brackets that needs to be filled like [your name], [your email], etc. 
+    Do NOT invent information that was not present in previous tickets or in user metabadata or ticket metadata or additional information.
+    Always prioritize information from the most recent tickets, espcially if they are contradictory.
     
     
-    Here are default instructions that can be ignored if they are contradictory to the above instructions:
+    Here are default instructions that can be ignored if they are contradictory to the <instructions from me> section:
     <default instructions>
-    - Don't be too verbose, use the same length as in similar tickets.
+    - Don't be too verbose, use the same amount of details as in similar tickets.
     - Use the same tone, format, structure and lexical field as in similar tickets agent responses.
-    - Use paragraphs and sentences. The text must be readable and have well defined paragraphs (\\n\\n) or line breaks (\\n).
-    - Always add the most relevant informations to the response, just like in similar tickets response so the user have all the informations needed.
     - Maintain consistency in terminology used in recent tickets.
     - Answer in the same language as the user.
+    - Don't add a signature at the end of the answer, it will be added once the answer is sent.
     </default instructions>
     
+    
+    Here are instructions that you MUST follow and prioritize over the <default instructions> section:
+    <instructions from me>
+    {guidelines}
+    </instructions from me>
+    """
 
+    user_prompt_template = """
     Here are informations about the user that can help you to answer:
+    <user_metadata>
     {user_metadata}
+    </user_metadata>
 
     Here are metadata on the curent ticket that can help you to answer:
+    <ticket_metadata>
     {ticket_metadata}
+    </ticket_metadata>
 
 
     Here are the most relevant similar tickets that can help you to answer:
+    <similar_tickets>
     {similar_tickets}
+    </similar_tickets>
 
     Here are the current ticket history:
+    <ticket_history>
     {ticket_history}
+    </ticket_history>
 
+    Here are additional information that can help you to answer:
+    <additional_information>
     {additional_information}
+    </additional_information>
 
-    Here is the client question to which you must answer
+    Here is the client question to which you must answer:
+    <client_query>
     {client_query}
+    </client_query>
  
-    Answer directly with the message to send to the customer, ready to be sent:
-    Answer:
-    """
+    Based on the informations provided, answer directly with the message to send to the customer, ready to be sent:
+    Answer:"""
 
-    ZENDESK_TEMPLATE_PROMPT = ChatPromptTemplate.from_messages(
+    ZENDESK_TEMPLATE_PROMPT = ChatPromptTemplate(
         [
-            SystemMessagePromptTemplate.from_template(system_message_zendesk_template),
+            ("system", system_message_zendesk_template),
+            ("user", user_prompt_template),
         ]
     )
     custom_prompts[TemplatePromptName.ZENDESK_TEMPLATE_PROMPT] = ZENDESK_TEMPLATE_PROMPT
diff --git a/core/quivr_core/rag/quivr_rag_langgraph.py b/core/quivr_core/rag/quivr_rag_langgraph.py
@@ -21,7 +21,7 @@
 from langchain_community.document_compressors import JinaRerank
 from langchain_core.callbacks import Callbacks
 from langchain_core.documents import BaseDocumentCompressor, Document
-from langchain_core.messages import BaseMessage
+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
 from langchain_core.messages.ai import AIMessageChunk
 from langchain_core.prompts.base import BasePromptTemplate
 from langchain_core.runnables.schema import StreamEvent
@@ -970,7 +970,21 @@ def generate_chat_llm(self, state: AgentState) -> AgentState:
             dict: The updated state with re-phrased question
         """
         messages = state["messages"]
-        user_task = messages[0].content
+        print(messages)
+
+        # Check if there is a system message in messages
+        system_message = None
+        user_message = None
+
+        for msg in messages:
+            if isinstance(msg, SystemMessage):
+                system_message = str(msg.content)
+            elif isinstance(msg, HumanMessage):
+                user_message = str(msg.content)
+
+        user_task = (
+            user_message if user_message else (messages[0].content if messages else "")
+        )
 
         # Prompt
         prompt = self.retrieval_config.prompt
@@ -982,18 +996,18 @@ def generate_chat_llm(self, state: AgentState) -> AgentState:
 
         # LLM
         llm = self.llm_endpoint._llm
-        if state.get("enforced_system_prompt", None):
-            final_inputs["enforced_system_prompt"] = state["enforced_system_prompt"]
-            prompt = custom_prompts[TemplatePromptName.ZENDESK_LLM_PROMPT]
 
-        else:
-            prompt = custom_prompts[TemplatePromptName.CHAT_LLM_PROMPT]
-        state, reduced_inputs = self.reduce_rag_context(state, final_inputs, prompt)
-
-        msg = prompt.format(**reduced_inputs)
+        prompt = custom_prompts[TemplatePromptName.CHAT_LLM_PROMPT]
+        state, reduced_inputs = self.reduce_rag_context(
+            state, final_inputs, system_message if system_message else prompt
+        )
+        CHAT_LLM_PROMPT = [
+            SystemMessage(content=str(system_message)),
+            HumanMessage(content=str(user_message)),
+        ]
 
         # Run
-        response = llm.invoke(msg)
+        response = llm.invoke(CHAT_LLM_PROMPT)
         return {**state, "messages": [response]}
 
     def build_chain(self):
@@ -1043,6 +1057,7 @@ def _add_node_edges(self, workflow: StateGraph, node: NodeConfig):
     async def answer_astream(
         self,
         question: str,
+        system_prompt: str | None,
         history: ChatHistory,
         list_files: list[QuivrKnowledge],
         metadata: dict[str, str] = {},
@@ -1059,15 +1074,13 @@ async def answer_astream(
         rolling_message = AIMessageChunk(content="")
         docs: list[Document] | None = None
         previous_content = ""
+        system_prompt = system_prompt
+        messages = [("system", system_prompt)] if system_prompt else []
+        messages.append(("user", question))
 
         async for event in conversational_qa_chain.astream_events(
             {
-                "messages": [
-                    (
-                        "user",
-                        question,
-                    )
-                ],
+                "messages": messages,
                 "chat_history": history,
                 "files": concat_list_files,
                 **input_kwargs,