robusta-dev · nherment · Oct 2, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
diff --git a/holmes/core/models.py b/holmes/core/models.py
@@ -28,17 +28,11 @@ class ToolCallResult(BaseModel):
     size: Optional[int] = None
 
     def as_tool_call_message(self):
-        content = format_tool_result_data(self.result)
-        if self.result.params:
-            content = (
-                f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
-                + content
-            )
         return {
             "tool_call_id": self.tool_call_id,
             "role": "tool",
             "name": self.tool_name,
-            "content": content,
+            "content": format_tool_result_data(self.result),
         }
 
     def as_tool_result_response(self):
@@ -80,6 +74,12 @@ def format_tool_result_data(tool_result: StructuredToolResult) -> str:
             tool_response = str(tool_result.data)
     if tool_result.status == StructuredToolResultStatus.ERROR:
         tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
+
+    if tool_result.params:
+        tool_response = (
+            f"Params used for the tool call: {json.dumps(tool_result.params)}. The tool call output follows on the next line.\n"
+            + tool_response
+        )
     return tool_response
 
 

diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py
@@ -39,8 +39,13 @@
 from holmes.core.resource_instruction import ResourceInstructions
 from holmes.core.runbooks import RunbookManager
 from holmes.core.safeguards import prevent_overly_repeated_tool_call
-from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
+from holmes.core.tools import (
+    StructuredToolResult,
+    StructuredToolResultStatus,
+    ToolInvokeContext,
+)
 from holmes.core.tools_utils.tool_context_window_limiter import (
+    get_max_token_count_for_single_tool,
     prevent_overly_big_tool_response,
 )
 from holmes.plugins.prompts import load_and_render_prompt
@@ -622,9 +627,13 @@ def _directly_invoke_tool_call(
             )
 
         try:
-            tool_response = tool.invoke(
-                tool_params, tool_number=tool_number, user_approved=user_approved
+            invoke_context = ToolInvokeContext(
+                tool_number=tool_number,
+                user_approved=user_approved,
+                llm=self.llm,
+                max_token_count=get_max_token_count_for_single_tool(self.llm),
             )
+            tool_response = tool.invoke(tool_params, context=invoke_context)
         except Exception as e:
             logging.error(
                 f"Tool call to {tool_name} failed with an Exception", exc_info=True

diff --git a/holmes/core/tools.py b/holmes/core/tools.py
@@ -31,6 +31,7 @@
 )
 from rich.console import Console
 
+from holmes.core.llm import LLM
 from holmes.core.openai_formatting import format_tool_to_open_ai_standard
 from holmes.plugins.prompts import load_and_render_prompt
 from holmes.core.transformers import (
@@ -159,6 +160,15 @@ class ToolParameter(BaseModel):
     items: Optional["ToolParameter"] = None  # For array item schemas
 
 
+class ToolInvokeContext(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    tool_number: Optional[int] = None
+    user_approved: bool = False
+    llm: LLM
+    max_token_count: int
+
+
 class Tool(ABC, BaseModel):
     name: str
     description: str
@@ -225,15 +235,14 @@ def get_openai_format(self, target_model: str):
     def invoke(
         self,
         params: Dict,
-        tool_number: Optional[int] = None,
-        user_approved: bool = False,
+        context: ToolInvokeContext,
     ) -> StructuredToolResult:
-        tool_number_str = f"#{tool_number} " if tool_number else ""
+        tool_number_str = f"#{context.tool_number} " if context.tool_number else ""
         logger.info(
             f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
         )
         start_time = time.time()
-        result = self._invoke(params=params, user_approved=user_approved)
+        result = self._invoke(params=params, context=context)
         result.icon_url = self.icon_url
 
         # Apply transformers to the result
@@ -244,7 +253,7 @@ def invoke(
             if hasattr(transformed_result, "get_stringified_data")
             else str(transformed_result)
         )
-        show_hint = f"/show {tool_number}" if tool_number else "/show"
+        show_hint = f"/show {context.tool_number}" if context.tool_number else "/show"
         line_count = output_str.count("\n") + 1 if output_str else 0
         logger.info(
             f"  [dim]Finished {tool_number_str}in {elapsed:.2f}s, output length: {len(output_str):,} characters ({line_count:,} lines) - {show_hint} to view contents[/dim]"
@@ -340,7 +349,9 @@ def _apply_transformers(self, result: StructuredToolResult) -> StructuredToolRes
 
     @abstractmethod
     def _invoke(
-        self, params: dict, user_approved: bool = False
+        self,
+        params: dict,
+        context: ToolInvokeContext,
     ) -> StructuredToolResult:
         """
         params: the tool params
@@ -400,7 +411,9 @@ def _get_status(
         return StructuredToolResultStatus.SUCCESS
 
     def _invoke(
-        self, params: dict, user_approved: bool = False
+        self,
+        params: dict,
+        context: ToolInvokeContext,
     ) -> StructuredToolResult:
         if self.command is not None:
             raw_output, return_code, invocation = self.__invoke_command(params)

diff --git a/holmes/core/tools_utils/token_counting.py b/holmes/core/tools_utils/token_counting.py
@@ -0,0 +1,13 @@
+from holmes.core.llm import LLM
+from holmes.core.models import format_tool_result_data
+from holmes.core.tools import StructuredToolResult
+
+
+def count_tool_response_tokens(
+    llm: LLM, structured_tool_result: StructuredToolResult
+) -> int:
+    message = {
+        "role": "tool",
+        "content": format_tool_result_data(structured_tool_result),
+    }
+    return llm.count_tokens_for_message([message])
diff --git a/holmes/core/tools_utils/tool_context_window_limiter.py b/holmes/core/tools_utils/tool_context_window_limiter.py
@@ -1,33 +1,55 @@
+from typing import Optional
 from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
 from holmes.core.llm import LLM
 from holmes.core.tools import StructuredToolResultStatus
 from holmes.core.models import ToolCallResult
 from holmes.utils import sentry_helper
 
 
+def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
+    context_window_size = llm.get_context_window_size()
+
+    if 0 < percent_of_total_context_window and percent_of_total_context_window <= 100:
+        return int(context_window_size * percent_of_total_context_window // 100)
+    else:
+        return context_window_size
+
+
+def get_max_token_count_for_single_tool(llm: LLM) -> int:
+    return get_pct_token_count(
+        percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
+    )
+
+
 def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
-    if (
-        tool_call_result.result.status == StructuredToolResultStatus.SUCCESS
-        and 0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
-        and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
-    ):
-        message = tool_call_result.as_tool_call_message()
-
-        messages_token = llm.count_tokens_for_message(messages=[message])
-        context_window_size = llm.get_context_window_size()
-        max_tokens_allowed: int = int(
-            context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
+    max_tokens_allowed = get_max_token_count_for_single_tool(llm)
+
+    message = tool_call_result.as_tool_call_message()
+    messages_token = llm.count_tokens_for_message(messages=[message])
+
+    if messages_token > max_tokens_allowed:
+        relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
+
+        error_message: Optional[str] = (
+            f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
         )
 
-        if messages_token > max_tokens_allowed:
-            relative_pct = (
-                (messages_token - max_tokens_allowed) / messages_token
-            ) * 100
-            error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
-            tool_call_result.result.status = StructuredToolResultStatus.ERROR
-            tool_call_result.result.data = None
-            tool_call_result.result.error = error_message
-
-            sentry_helper.capture_toolcall_contains_too_many_tokens(
-                tool_call_result, messages_token, max_tokens_allowed
+        if tool_call_result.result.status == StructuredToolResultStatus.NO_DATA:
+            error_message = None
+            # tool_call_result.result.data is set to None below which is expected to fix the issue
+        elif tool_call_result.result.status == StructuredToolResultStatus.ERROR:
+            original_error = (
+                tool_call_result.result.error
+                or tool_call_result.result.data
+                or "Unknown error"
             )
+            truncated_error = str(original_error)[:100]
+            error_message = f"The tool call returned an error it is too large to return\nThe following original error is truncated:\n{truncated_error}"
+
+        tool_call_result.result.status = StructuredToolResultStatus.ERROR
+        tool_call_result.result.data = None
+        tool_call_result.result.error = error_message
+
+        sentry_helper.capture_toolcall_contains_too_many_tokens(
+            tool_call_result, messages_token, max_tokens_allowed
+        )
diff --git a/holmes/core/tools_utils/tool_executor.py b/holmes/core/tools_utils/tool_executor.py
@@ -9,6 +9,7 @@
     StructuredToolResultStatus,
     Toolset,
     ToolsetStatusEnum,
+    ToolInvokeContext,
 )
 from holmes.core.tools_utils.toolset_utils import filter_out_default_logging_toolset
 
@@ -46,16 +47,20 @@ def __init__(self, toolsets: List[Toolset]):
                     )
                 self.tools_by_name[tool.name] = tool
 
-    def invoke(self, tool_name: str, params: dict) -> StructuredToolResult:
+    def invoke(
+        self, tool_name: str, params: dict, context: ToolInvokeContext
+    ) -> StructuredToolResult:
+        """TODO: remove this function as it seems unused.
+        We call tool_executor.get_tool_by_name() and then tool.invoke() directly instead of this invoke function
+        """
         tool = self.get_tool_by_name(tool_name)
-        return (
-            tool.invoke(params)
-            if tool
-            else StructuredToolResult(
+        if not tool:
+            return StructuredToolResult(
                 status=StructuredToolResultStatus.ERROR,
                 error=f"Could not find tool named {tool_name}",
             )
-        )
+
+        return tool.invoke(params, context)
 
     def get_tool_by_name(self, name: str) -> Optional[Tool]:
         if name in self.tools_by_name:

diff --git a/holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py b/holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py
@@ -4,6 +4,7 @@
 from holmes.core.tools import (
     CallablePrerequisite,
     Tool,
+    ToolInvokeContext,
     ToolParameter,
     Toolset,
     ToolsetTag,
@@ -118,9 +119,7 @@ def get_parameterized_one_liner(self, params) -> str:
         project_id = self.toolset.config.get("project_id", "")
         return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Alerts ({project_id})"
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
             url = "https://cloud.mongodb.com/api/atlas/v2/groups/{project_id}/alerts".format(
                 project_id=self.toolset.config.get("project_id")
@@ -145,9 +144,7 @@ def get_parameterized_one_liner(self, params) -> str:
         project_id = self.toolset.config.get("project_id", "")
         return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Processes ({project_id})"
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
             url = "https://cloud.mongodb.com/api/atlas/v2/groups/{project_id}/processes".format(
                 project_id=self.toolset.config.get("project_id")
@@ -180,9 +177,7 @@ def get_parameterized_one_liner(self, params) -> str:
         process_id = params.get("process_id", "")
         return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Slow Queries ({process_id})"
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
             url = self.url.format(
                 project_id=self.toolset.config.get("project_id"),
@@ -209,9 +204,7 @@ def get_parameterized_one_liner(self, params) -> str:
         project_id = self.toolset.config.get("project_id", "")
         return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Events ({project_id})"
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         params.update({"itemsPerPage": 500})
         try:
             now_utc = datetime.now(timezone.utc)
@@ -268,9 +261,7 @@ def get_parameterized_one_liner(self, params) -> str:
         hostname = params.get("hostName", "")
         return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Host Logs ({hostname})"
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         one_hour_ago = datetime.now(timezone.utc) - timedelta(hours=1)
         try:
             url = self.url.format(
@@ -324,9 +315,7 @@ def get_parameterized_one_liner(self, params) -> str:
         event_type = params.get("eventType", "")
         return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Event Details ({event_type})"
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
             url = self.url.format(projectId=self.toolset.config.get("project_id"))
 

diff --git a/holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py b/holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py
@@ -4,6 +4,7 @@
 
 from holmes.core.tools import (
     StructuredToolResult,
+    ToolInvokeContext,
     ToolParameter,
     StructuredToolResultStatus,
 )
@@ -217,9 +218,7 @@ def _build_connection_failures_report(
 
         return "\n".join(report_sections)
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
             # Get configuration
             db_config = self.toolset.database_config()

diff --git a/holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py b/holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py
@@ -4,6 +4,7 @@
 
 from holmes.core.tools import (
     StructuredToolResult,
+    ToolInvokeContext,
     ToolParameter,
     StructuredToolResultStatus,
 )
@@ -155,9 +156,7 @@ def _build_connection_report(
 
         return "\n".join(report_sections)
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
             hours_back = params.get("hours_back", 2)
 

diff --git a/holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py b/holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py
@@ -2,7 +2,11 @@
 from typing import Dict
 from datetime import datetime, timezone
 
-from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
+from holmes.core.tools import (
+    StructuredToolResult,
+    StructuredToolResultStatus,
+    ToolInvokeContext,
+)
 from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
     BaseAzureSQLTool,
     BaseAzureSQLToolset,
@@ -131,9 +135,7 @@ def _build_health_report(
 
         return "\n".join(report_sections)
 
-    def _invoke(
-        self, params: dict, user_approved: bool = False
-    ) -> StructuredToolResult:
+    def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
             db_config = self.toolset.database_config()
             client = self.toolset.api_client()