robusta-dev · arikalon1 · Aug 19, 2025 · Aug 6, 2025 · Aug 12, 2025 · Aug 6, 2025
diff --git a/docs/installation/python-installation.md b/docs/installation/python-installation.md
@@ -48,6 +48,7 @@ messages = build_initial_ask_messages(
     initial_user_prompt=question,
     file_paths=None,
     tool_executor=ai.tool_executor,
+    investigation_id=ai.investigation_id,
     runbooks=config.get_runbook_catalog(),
     system_prompt_additions=None
 )
@@ -129,6 +130,7 @@ def main():
                 initial_user_prompt=question,
                 file_paths=None,
                 tool_executor=ai.tool_executor,
+                investigation_id=ai.investigation_id,
                 runbooks=config.get_runbook_catalog(),
                 system_prompt_additions=None
             )
@@ -222,6 +224,7 @@ def main():
         initial_user_prompt=first_question,
         file_paths=None,
         tool_executor=ai.tool_executor,
+        investigation_id=ai.investigation_id,
         runbooks=config.get_runbook_catalog(),
         system_prompt_additions=None
     )

diff --git a/examples/custom_llm.py b/examples/custom_llm.py
@@ -55,7 +55,7 @@ def ask_holmes():
     )
 
     tool_executor = ToolExecutor(load_builtin_toolsets())
-    ai = ToolCallingLLM(tool_executor, max_steps=10, llm=MyCustomLLM())
+    ai = ToolCallingLLM(tool_executor, max_steps=40, llm=MyCustomLLM())
 
     response = ai.prompt_call(system_prompt, prompt)
 

diff --git a/holmes/config.py b/holmes/config.py
@@ -74,7 +74,7 @@ class Config(RobustaBaseConfig):
         None  # if None, read from OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT env var
     )
     model: Optional[str] = "gpt-4o"
-    max_steps: int = 10
+    max_steps: int = 40
     cluster_name: Optional[str] = None
 
     alertmanager_url: Optional[str] = None

diff --git a/holmes/core/conversations.py b/holmes/core/conversations.py
@@ -133,6 +133,7 @@ def build_issue_chat_messages(
                     "issue": issue_chat_request.issue_type,
                     "toolsets": ai.tool_executor.toolsets,
                     "cluster_name": config.cluster_name,
+                    "investigation_id": ai.investigation_id,
                 },
             )
             messages = [
@@ -153,6 +154,7 @@ def build_issue_chat_messages(
             "issue": issue_chat_request.issue_type,
             "toolsets": ai.tool_executor.toolsets,
             "cluster_name": config.cluster_name,
+            "investigation_id": ai.investigation_id,
         }
         system_prompt_without_tools = load_and_render_prompt(
             template_path, template_context_without_tools
@@ -186,6 +188,7 @@ def build_issue_chat_messages(
             "issue": issue_chat_request.issue_type,
             "toolsets": ai.tool_executor.toolsets,
             "cluster_name": config.cluster_name,
+            "investigation_id": ai.investigation_id,
         }
         system_prompt_with_truncated_tools = load_and_render_prompt(
             template_path, truncated_template_context
@@ -227,6 +230,7 @@ def build_issue_chat_messages(
         "issue": issue_chat_request.issue_type,
         "toolsets": ai.tool_executor.toolsets,
         "cluster_name": config.cluster_name,
+        "investigation_id": ai.investigation_id,
     }
     system_prompt_without_tools = load_and_render_prompt(
         template_path, template_context_without_tools
@@ -250,6 +254,7 @@ def build_issue_chat_messages(
         "issue": issue_chat_request.issue_type,
         "toolsets": ai.tool_executor.toolsets,
         "cluster_name": config.cluster_name,
+        "investigation_id": ai.investigation_id,
     }
     system_prompt_with_truncated_tools = load_and_render_prompt(
         template_path, template_context
@@ -274,6 +279,7 @@ def add_or_update_system_prompt(
     context = {
         "toolsets": ai.tool_executor.toolsets,
         "cluster_name": config.cluster_name,
+        "investigation_id": ai.investigation_id,
     }
 
     system_prompt = load_and_render_prompt(template_path, context)
@@ -465,6 +471,7 @@ def build_workload_health_chat_messages(
                     "resource": resource,
                     "toolsets": ai.tool_executor.toolsets,
                     "cluster_name": config.cluster_name,
+                    "investigation_id": ai.investigation_id,
                 },
             )
             messages = [
@@ -485,6 +492,7 @@ def build_workload_health_chat_messages(
             "resource": resource,
             "toolsets": ai.tool_executor.toolsets,
             "cluster_name": config.cluster_name,
+            "investigation_id": ai.investigation_id,
         }
         system_prompt_without_tools = load_and_render_prompt(
             template_path, template_context_without_tools
@@ -518,6 +526,7 @@ def build_workload_health_chat_messages(
             "resource": resource,
             "toolsets": ai.tool_executor.toolsets,
             "cluster_name": config.cluster_name,
+            "investigation_id": ai.investigation_id,
         }
         system_prompt_with_truncated_tools = load_and_render_prompt(
             template_path, truncated_template_context
@@ -559,6 +568,7 @@ def build_workload_health_chat_messages(
         "resource": resource,
         "toolsets": ai.tool_executor.toolsets,
         "cluster_name": config.cluster_name,
+        "investigation_id": ai.investigation_id,
     }
     system_prompt_without_tools = load_and_render_prompt(
         template_path, template_context_without_tools
@@ -582,6 +592,7 @@ def build_workload_health_chat_messages(
         "resource": resource,
         "toolsets": ai.tool_executor.toolsets,
         "cluster_name": config.cluster_name,
+        "investigation_id": ai.investigation_id,
     }
     system_prompt_with_truncated_tools = load_and_render_prompt(
         template_path, template_context

diff --git a/holmes/core/investigation.py b/holmes/core/investigation.py
@@ -10,6 +10,7 @@
 from holmes.core.tracing import DummySpan, SpanType
 from holmes.utils.global_instructions import add_global_instructions_to_user_prompt
 from holmes.utils.robusta import load_robusta_api_key
+from holmes.core.todo_manager import get_todo_manager
 
 from holmes.core.investigation_structured_output import (
     DEFAULT_SECTIONS,
@@ -133,6 +134,9 @@ def get_investigation_context(
     else:
         logging.info("Structured output is disabled for this request")
 
+    todo_manager = get_todo_manager()
+    todo_context = todo_manager.format_tasks_for_prompt(ai.investigation_id)
+
     system_prompt = load_and_render_prompt(
         investigate_request.prompt_template,
         {
@@ -141,6 +145,8 @@ def get_investigation_context(
             "structured_output": request_structured_output_from_llm,
             "toolsets": ai.tool_executor.toolsets,
             "cluster_name": config.cluster_name,
+            "todo_list": todo_context,
+            "investigation_id": ai.investigation_id,
         },
     )
 

diff --git a/holmes/core/openai_formatting.py b/holmes/core/openai_formatting.py
@@ -24,6 +24,26 @@ def type_to_open_ai_schema(param_attributes: Any, strict_mode: bool) -> dict[str
         type_obj = {"type": "object"}
         if strict_mode:
             type_obj["additionalProperties"] = False
+
+        # Use explicit properties if provided
+        if hasattr(param_attributes, "properties") and param_attributes.properties:
+            type_obj["properties"] = {
+                name: type_to_open_ai_schema(prop, strict_mode)
+                for name, prop in param_attributes.properties.items()
+            }
+            if strict_mode:
+                type_obj["required"] = list(param_attributes.properties.keys())
+
+    elif param_type == "array":
+        # Handle arrays with explicit item schemas
+        if hasattr(param_attributes, "items") and param_attributes.items:
+            items_schema = type_to_open_ai_schema(param_attributes.items, strict_mode)
+            type_obj = {"type": "array", "items": items_schema}
+        else:
+            # Fallback for arrays without explicit item schema
+            type_obj = {"type": "array", "items": {"type": "object"}}
+            if strict_mode:
+                type_obj["items"]["additionalProperties"] = False
     else:
         match = re.match(pattern, param_type)
 
@@ -33,10 +53,9 @@ def type_to_open_ai_schema(param_attributes: Any, strict_mode: bool) -> dict[str
         if match.group("inner_type"):
             inner_type = match.group("inner_type")
             if inner_type == "object":
-                items_obj: dict[str, Any] = {"type": "object"}
-                if strict_mode:
-                    items_obj["additionalProperties"] = False
-                type_obj = {"type": "array", "items": items_obj}
+                raise ValueError(
+                    "object inner type must have schema. Use ToolParameter.items"
+                )
             else:
                 type_obj = {"type": "array", "items": {"type": inner_type}}
         else:

diff --git a/holmes/core/prompt.py b/holmes/core/prompt.py
@@ -25,11 +25,22 @@ def append_all_files_to_user_prompt(
     return user_prompt
 
 
+def get_tasks_management_system_reminder() -> str:
+    return (
+        "\n\n<system-reminder>\nIMPORTANT: You have access to the TodoWrite tool. It creates a TodoList, in order to track progress. It's very important. You MUST use it:\n1. FIRST: Ask your self which sub problems you need to solve in order to answer the question."
+        "Do this, BEFORE any other tools\n2. "
+        "AFTER EVERY TOOL CALL: If required, update the TodoList\n3. "
+        "\n\nFAILURE TO UPDATE TodoList = INCOMPLETE INVESTIGATION\n\n"
+        "Example flow:\n- Think and divide to sub problems → create TodoList → Perform each task on the list → Update list → Verify your solution\n</system-reminder>"
+    )
+
+
 def build_initial_ask_messages(
     console: Console,
     initial_user_prompt: str,
     file_paths: Optional[List[Path]],
     tool_executor: Any,  # ToolExecutor type
+    investigation_id: str,
     runbooks: Union[RunbookCatalog, Dict, None] = None,
     system_prompt_additions: Optional[str] = None,
 ) -> List[Dict]:
@@ -49,6 +60,7 @@ def build_initial_ask_messages(
         "toolsets": tool_executor.toolsets,
         "runbooks": runbooks or {},
         "system_prompt_additions": system_prompt_additions or "",
+        "investigation_id": investigation_id,
     }
     system_prompt_rendered = load_and_render_prompt(
         system_prompt_template, template_context
@@ -59,6 +71,7 @@ def build_initial_ask_messages(
         console, initial_user_prompt, file_paths
     )
 
+    user_prompt_with_files += get_tasks_management_system_reminder()
     messages = [
         {"role": "system", "content": system_prompt_rendered},
         {"role": "user", "content": user_prompt_with_files},

diff --git a/holmes/core/todo_manager.py b/holmes/core/todo_manager.py
@@ -0,0 +1,88 @@
+from typing import Dict, List
+from threading import Lock
+
+from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
+
+
+class TodoListManager:
+    """
+    Session-based storage manager for investigation TodoLists.
+    Stores TodoLists per session and provides methods to get/update tasks.
+    """
+
+    def __init__(self):
+        self._sessions: Dict[str, List[Task]] = {}
+        self._lock: Lock = Lock()
+
+    def get_session_tasks(self, session_id: str) -> List[Task]:
+        with self._lock:
+            return self._sessions.get(session_id, []).copy()
+
+    def update_session_tasks(self, session_id: str, tasks: List[Task]) -> None:
+        with self._lock:
+            self._sessions[session_id] = tasks.copy()
+
+    def clear_session(self, session_id: str) -> None:
+        with self._lock:
+            if session_id in self._sessions:
+                del self._sessions[session_id]
+
+    def get_session_count(self) -> int:
+        with self._lock:
+            return len(self._sessions)
+
+    def format_tasks_for_prompt(self, session_id: str) -> str:
+        """
+        Format tasks for injection into system prompt.
+        Returns empty string if no tasks exist.
+        """
+        tasks = self.get_session_tasks(session_id)
+
+        if not tasks:
+            return ""
+
+        status_order = {
+            TaskStatus.PENDING: 0,
+            TaskStatus.IN_PROGRESS: 1,
+            TaskStatus.COMPLETED: 2,
+        }
+
+        sorted_tasks = sorted(
+            tasks,
+            key=lambda t: (status_order.get(t.status, 3),),
+        )
+
+        lines = ["# CURRENT INVESTIGATION TASKS"]
+        lines.append("")
+
+        pending_count = sum(1 for t in tasks if t.status == TaskStatus.PENDING)
+        progress_count = sum(1 for t in tasks if t.status == TaskStatus.IN_PROGRESS)
+        completed_count = sum(1 for t in tasks if t.status == TaskStatus.COMPLETED)
+
+        lines.append(
+            f"**Task Status**: {completed_count} completed, {progress_count} in progress, {pending_count} pending"
+        )
+        lines.append("")
+
+        for task in sorted_tasks:
+            status_indicator = {
+                TaskStatus.PENDING: "[ ]",
+                TaskStatus.IN_PROGRESS: "[~]",
+                TaskStatus.COMPLETED: "[✓]",
+            }.get(task.status, "[?]")
+
+            lines.append(f"{status_indicator} [{task.id}] {task.content}")
+
+        lines.append("")
+        lines.append(
+            "**Instructions**: Use TodoWrite tool to update task status as you work. Mark tasks as 'in_progress' when starting, 'completed' when finished."
+        )
+
+        return "\n".join(lines)
+
+
+_todo_manager = TodoListManager()
+
+
+def get_todo_manager() -> TodoListManager:
+    return _todo_manager
diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import textwrap
+import uuid
 from typing import Dict, List, Optional, Type, Union
 
 import sentry_sdk
@@ -38,6 +39,9 @@
 from holmes.core.tracing import DummySpan
 from holmes.utils.colors import AI_COLOR
 from holmes.utils.stream import StreamEvents, StreamMessage
+from holmes.core.todo_manager import (
+    get_todo_manager,
+)
 
 
 def format_tool_result_data(tool_result: StructuredToolResult) -> str:
@@ -207,6 +211,7 @@ def __init__(
         self.max_steps = max_steps
         self.tracer = tracer
         self.llm = llm
+        self.investigation_id = str(uuid.uuid4())
 
     def prompt_call(
         self,
@@ -780,6 +785,9 @@ def investigate(
                 "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
             )
 
+        todo_manager = get_todo_manager()
+        todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
+
         system_prompt = load_and_render_prompt(
             prompt,
             {
@@ -788,6 +796,8 @@ def investigate(
                 "structured_output": request_structured_output_from_llm,
                 "toolsets": self.tool_executor.toolsets,
                 "cluster_name": self.cluster_name,
+                "todo_list": todo_context,
+                "investigation_id": self.investigation_id,
             },
         )
 

diff --git a/holmes/core/tools.py b/holmes/core/tools.py
@@ -122,6 +122,8 @@ class ToolParameter(BaseModel):
     description: Optional[str] = None
     type: str = "string"
     required: bool = True
+    properties: Optional[Dict[str, "ToolParameter"]] = None  # For object types
+    items: Optional["ToolParameter"] = None  # For array item schemas
 
 
 class Tool(ABC, BaseModel):

diff --git a/holmes/interactive.py b/holmes/interactive.py
@@ -1002,6 +1002,7 @@ def get_bottom_toolbar():
                     user_input,
                     include_files,
                     ai.tool_executor,
+                    ai.investigation_id,
                     runbooks,
                     system_prompt_additions,
                 )