robusta-dev · nherment · Sep 2, 2025 · Aug 18, 2025 · Aug 19, 2025 · Aug 19, 2025
diff --git a/holmes/common/env_vars.py b/holmes/common/env_vars.py
@@ -67,3 +67,6 @@ def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
 
 # When using the bash tool, setting BASH_TOOL_UNSAFE_ALLOW_ALL will skip any command validation and run any command requested by the LLM
 BASH_TOOL_UNSAFE_ALLOW_ALL = load_bool("BASH_TOOL_UNSAFE_ALLOW_ALL", False)
+
+# For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
+ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py
@@ -3,7 +3,7 @@
 import logging
 import textwrap
 import uuid
-from typing import Dict, List, Optional, Type, Union
+from typing import Dict, List, Optional, Type, Union, Callable
 
 import sentry_sdk
 from openai import BadRequestError
@@ -284,6 +284,9 @@ def __init__(
         self.tracer = tracer
         self.llm = llm
         self.investigation_id = str(uuid.uuid4())
+        self.approval_callback: Optional[
+            Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
+        ] = None
 
     def prompt_call(
         self,
@@ -469,7 +472,7 @@ def call(  # type: ignore
                     logging.debug(f"Tool to call: {t}")
                     futures.append(
                         executor.submit(
-                            self._invoke_tool,
+                            self._invoke_llm_tool_call,
                             tool_to_call=t,
                             previous_tool_calls=tool_calls,
                             trace_span=trace_span,
@@ -480,6 +483,8 @@ def call(  # type: ignore
                 for future in concurrent.futures.as_completed(futures):
                     tool_call_result: ToolCallResult = future.result()
 
+                    tool_call_result = self.handle_tool_call_approval(tool_call_result)
+
                     tool_calls.append(tool_call_result.as_tool_result_response())
                     messages.append(tool_call_result.as_tool_call_message())
 
@@ -494,7 +499,63 @@ def call(  # type: ignore
 
         raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")
 
-    def _invoke_tool(
+    def _directly_invoke_tool(
+        self,
+        tool_name: str,
+        tool_params: dict,
+        user_approved: bool,
+        trace_span=DummySpan(),
+        tool_number: Optional[int] = None,
+    ) -> StructuredToolResult:
+        tool_span = trace_span.start_span(name=tool_name, type="tool")
+        tool = self.tool_executor.get_tool_by_name(tool_name)
+        tool_response = None
+        try:
+            if (not tool) or (tool_params is None):
+                logging.warning(
+                    f"Skipping tool execution for {tool_name}: args: {tool_params}"
+                )
+                tool_response = StructuredToolResult(
+                    status=ToolResultStatus.ERROR,
+                    error=f"Failed to find tool {tool_name}",
+                    params=tool_params,
+                )
+            else:
+                tool_response = tool.invoke(
+                    tool_params, tool_number=tool_number, user_approved=user_approved
+                )
+        except Exception as e:
+            logging.error(
+                f"Tool call to {tool_name} failed with an Exception", exc_info=True
+            )
+            tool_response = StructuredToolResult(
+                status=ToolResultStatus.ERROR,
+                error=f"Tool call failed: {e}",
+                params=tool_params,
+            )
+
+            # Log error to trace span
+            tool_span.log(
+                input=tool_params, output=str(e), metadata={"status": "ERROR"}
+            )
+
+        tool_span.log(
+            input=tool_params,
+            output=tool_response.data,
+            metadata={
+                "status": tool_response.status.value,
+                "error": tool_response.error,
+                "description": tool.get_parameterized_one_liner(tool_params)
+                if tool
+                else "",
+                "structured_tool_result": tool_response,
+            },
+        )
+        tool_span.end()
+
+        return tool_response
+
+    def _invoke_llm_tool_call(
         self,
         tool_to_call: ChatCompletionMessageToolCall,
         previous_tool_calls: list[dict],
@@ -523,92 +584,97 @@ def _invoke_tool(
                 ),
             )
 
-        tool_params = None
+        tool_params = {}
         try:
             tool_params = json.loads(tool_arguments)
         except Exception:
             logging.warning(
                 f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
             )
-        tool_call_id = tool_to_call.id
-        tool = self.tool_executor.get_tool_by_name(tool_name)
-
-        if (not tool) or (tool_params is None):
-            logging.warning(
-                f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
-            )
-            return ToolCallResult(
-                tool_call_id=tool_call_id,
-                tool_name=tool_name,
-                description="NA",
-                result=StructuredToolResult(
-                    status=ToolResultStatus.ERROR,
-                    error=f"Failed to find tool {tool_name}",
-                    params=tool_params,
-                ),
-            )
 
-        tool_response = None
+        tool_call_id = tool_to_call.id
 
-        # Create tool span if tracing is enabled
-        tool_span = trace_span.start_span(name=tool_name, type="tool")
+        tool_response = prevent_overly_repeated_tool_call(
+            tool_name=tool_name,
+            tool_params=tool_params,
+            tool_calls=previous_tool_calls,
+        )
 
-        try:
-            tool_response = prevent_overly_repeated_tool_call(
-                tool_name=tool.name,
+        if not tool_response:
+            tool_response = self._directly_invoke_tool(
+                tool_name=tool_name,
                 tool_params=tool_params,
-                tool_calls=previous_tool_calls,
+                user_approved=False,
+                trace_span=trace_span,
+                tool_number=tool_number,
             )
-            if not tool_response:
-                tool_response = tool.invoke(tool_params, tool_number=tool_number)
 
-            if not isinstance(tool_response, StructuredToolResult):
-                # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
-                logging.error(
-                    f"Tool {tool.name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
-                )
-                tool_response = StructuredToolResult(
-                    status=ToolResultStatus.SUCCESS,
-                    data=tool_response,
-                    params=tool_params,
-                )
-
-            # Log tool execution to trace span
-            tool_span.log(
-                input=tool_params,
-                output=tool_response.data,
-                metadata={
-                    "status": tool_response.status.value,
-                    "error": tool_response.error,
-                    "description": tool.get_parameterized_one_liner(tool_params),
-                    "structured_tool_result": tool_response,
-                },
-            )
-
-        except Exception as e:
+        if not isinstance(tool_response, StructuredToolResult):
+            # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
             logging.error(
-                f"Tool call to {tool_name} failed with an Exception", exc_info=True
+                f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
             )
             tool_response = StructuredToolResult(
-                status=ToolResultStatus.ERROR,
-                error=f"Tool call failed: {e}",
+                status=ToolResultStatus.SUCCESS,
+                data=tool_response,
                 params=tool_params,
             )
 
-            # Log error to trace span
-            tool_span.log(
-                input=tool_params, output=str(e), metadata={"status": "ERROR"}
-            )
-        finally:
-            # End tool span
-            tool_span.end()
+        tool = self.tool_executor.get_tool_by_name(tool_name)
         return ToolCallResult(
             tool_call_id=tool_call_id,
             tool_name=tool_name,
-            description=tool.get_parameterized_one_liner(tool_params),
+            description=tool.get_parameterized_one_liner(tool_params) if tool else "",
             result=tool_response,
         )
 
+    def handle_tool_call_approval(
+        self, tool_call_result: ToolCallResult
+    ) -> ToolCallResult:
+        """
+        Handle approval for a single tool call if required.
+
+        Args:
+            tool_call_result: A single tool call result that may require approval
+
+        Returns:
+            Updated tool call result with approved/denied status
+        """
+
+        if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
+            return tool_call_result
+
+        # If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
+        if not self.approval_callback:
+            tool_call_result.result.status = ToolResultStatus.ERROR
+            return tool_call_result
+
+        # Get approval from user
+        approved, feedback = self.approval_callback(tool_call_result.result)
+
+        if approved:
+            logging.debug(
+                f"User approved command: {tool_call_result.result.invocation}"
+            )
+
+            new_response = self._directly_invoke_tool(
+                tool_name=tool_call_result.tool_name,
+                tool_params=tool_call_result.result.params or {},
+                user_approved=True,
+                trace_span=DummySpan(),
+                tool_number=None,  # Could be extracted if needed
+            )
+            tool_call_result.result = new_response
+        else:
+            # User denied - update to error
+            feedback_text = f" User feedback: {feedback}" if feedback else ""
+            tool_call_result.result.status = ToolResultStatus.ERROR
+            tool_call_result.result.error = (
+                f"User denied command execution.{feedback_text}"
+            )
+
+        return tool_call_result
+
     @staticmethod
     def __load_post_processing_user_prompt(
         input_prompt, investigation, user_prompt: Optional[str] = None
@@ -789,7 +855,7 @@ def call_stream(
                 for tool_index, t in enumerate(tools_to_call, 1):  # type: ignore
                     futures.append(
                         executor.submit(
-                            self._invoke_tool,
+                            self._invoke_llm_tool_call,
                             tool_to_call=t,  # type: ignore
                             previous_tool_calls=tool_calls,
                             trace_span=DummySpan(),  # Streaming mode doesn't support tracing yet
@@ -804,6 +870,8 @@ def call_stream(
                 for future in concurrent.futures.as_completed(futures):
                     tool_call_result: ToolCallResult = future.result()
 
+                    tool_call_result = self.handle_tool_call_approval(tool_call_result)
+
                     tool_calls.append(tool_call_result.as_tool_result_response())
                     messages.append(tool_call_result.as_tool_call_message())
 

diff --git a/holmes/core/tools.py b/holmes/core/tools.py
@@ -24,12 +24,15 @@ class ToolResultStatus(str, Enum):
     SUCCESS = "success"
     ERROR = "error"
     NO_DATA = "no_data"
+    APPROVAL_REQUIRED = "approval_required"
 
     def to_color(self) -> str:
         if self == ToolResultStatus.SUCCESS:
             return "green"
         elif self == ToolResultStatus.ERROR:
             return "red"
+        elif self == ToolResultStatus.APPROVAL_REQUIRED:
+            return "yellow"
         else:
             return "white"
 
@@ -38,6 +41,8 @@ def to_emoji(self) -> str:
             return "✔"
         elif self == ToolResultStatus.ERROR:
             return "❌"
+        elif self == ToolResultStatus.APPROVAL_REQUIRED:
+            return "⚠️"
         else:
             return "⚪️"
 
@@ -148,14 +153,17 @@ def get_openai_format(self, target_model: str):
         )
 
     def invoke(
-        self, params: Dict, tool_number: Optional[int] = None
+        self,
+        params: Dict,
+        tool_number: Optional[int] = None,
+        user_approved: bool = False,
     ) -> StructuredToolResult:
         tool_number_str = f"#{tool_number} " if tool_number else ""
         logging.info(
             f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
         )
         start_time = time.time()
-        result = self._invoke(params)
+        result = self._invoke(params=params, user_approved=user_approved)
         result.icon_url = self.icon_url
         elapsed = time.time() - start_time
         output_str = (
@@ -171,7 +179,13 @@ def invoke(
         return result
 
     @abstractmethod
-    def _invoke(self, params: Dict) -> StructuredToolResult:
+    def _invoke(
+        self, params: dict, user_approved: bool = False
+    ) -> StructuredToolResult:
+        """
+        params: the tool params
+        user_approved: whether the tool call is approved by the user. Can be used to confidently execute unsafe actions.
+        """
         pass
 
     @abstractmethod
@@ -223,7 +237,9 @@ def _get_status(self, return_code: int, raw_output: str) -> ToolResultStatus:
             return ToolResultStatus.NO_DATA
         return ToolResultStatus.SUCCESS
 
-    def _invoke(self, params) -> StructuredToolResult:
+    def _invoke(
+        self, params: dict, user_approved: bool = False
+    ) -> StructuredToolResult:
         if self.command is not None:
             raw_output, return_code, invocation = self.__invoke_command(params)
         else: