Skip to content
Merged
Show file tree
Hide file tree
Changes from 50 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
4eaaf1a
wip
nherment Aug 18, 2025
0a21b16
wip
nherment Aug 19, 2025
123a30b
wip
nherment Aug 19, 2025
57db588
wip
nherment Aug 19, 2025
c814cfc
wip
nherment Aug 20, 2025
8258424
wip
nherment Aug 20, 2025
eb1e58b
wip
nherment Aug 20, 2025
d2d6b8b
report failures to sentry
nherment Aug 20, 2025
bf9c57b
Merge branch 'master' into rob-1931_bash_toolset_replaces_cli_toolsets
nherment Aug 20, 2025
e9bbab9
wip
nherment Aug 21, 2025
ca7b5ef
passing bash commands unit tests
nherment Aug 21, 2025
26d1100
chore: linting
nherment Aug 21, 2025
8961bc2
chore: revert changes to dockerfile
nherment Aug 21, 2025
4eb25f8
feat: add back llm instruction for bash kubectl run image
nherment Aug 21, 2025
5d473a6
Merge branch 'master' into rob-1931_bash_toolset_replaces_cli_toolsets
nherment Aug 21, 2025
248f243
fix: simplify bash grep tool
nherment Aug 21, 2025
c08633c
feat: add back llm instruction for bash kubectl run image
nherment Aug 22, 2025
225bc44
feat: improvements
nherment Aug 22, 2025
1468d63
feat: improvements
nherment Aug 22, 2025
1bda2a0
chore: address PR comments
nherment Aug 22, 2025
ea15fe7
chore: address PR comments
nherment Aug 22, 2025
10dbcec
chore: address PR comments
nherment Aug 22, 2025
e6e9c39
chore: address PR comments
nherment Aug 22, 2025
53843c5
chore: address PR comments
nherment Aug 22, 2025
d0dc549
chore: address PR comments
nherment Aug 22, 2025
c5031eb
chore: address PR comments
nherment Aug 22, 2025
0a7eeab
chore: address PR comments
nherment Aug 22, 2025
c01c085
chore: address PR comments
nherment Aug 22, 2025
40c1ae0
chore: address PR comments
nherment Aug 22, 2025
f5b21e9
chore: address PR comments
nherment Aug 22, 2025
4432f83
chore: address PR comments
nherment Aug 22, 2025
a4084f3
chore: address PR comments
nherment Aug 22, 2025
6ae6ef4
Merge branch 'master' into rob-1931_bash_toolset_replaces_cli_toolsets
nherment Aug 22, 2025
eb4fd59
chore: address PR comments
nherment Aug 22, 2025
c01d0cb
chore: restore default bash tool state
nherment Aug 22, 2025
b1848e5
feat: stricter prompt to avoid the llm limiting `kubectl logs` commands
nherment Aug 22, 2025
1dc3d9f
chore: linting
nherment Aug 22, 2025
65bdc03
feat: add CLI approval workflow for potentially unsafe tool calls
nherment Aug 25, 2025
eb5d952
chore: linting
nherment Aug 25, 2025
2ab09bb
feat: add CLI approval workflow for potentially unsafe tool calls
nherment Aug 25, 2025
f70ea87
chore: address PR comments
nherment Aug 26, 2025
11231d3
chore: address PR comments
nherment Aug 26, 2025
237eaa9
Merge branch 'master' into rob-1931_bash_toolset_replaces_cli_toolsets
nherment Aug 26, 2025
c429542
Merge branch 'rob-1931_bash_toolset_replaces_cli_toolsets' into rob-1…
nherment Aug 26, 2025
2b06eca
Merge branch 'master' into rob-1932_bash_tool_cli_approval
nherment Aug 28, 2025
e610784
fix: All tools can implement user approval through invoke function
nherment Aug 29, 2025
9e8f78d
fix: issue with multi async tool calls requiring approval
nherment Aug 29, 2025
7414053
fix: issue with multi async tool calls requiring approval
nherment Aug 29, 2025
1b2acde
chore: improve comments
nherment Aug 29, 2025
04650fd
chore: code cleanup
nherment Aug 29, 2025
6985c72
Merge branch 'master' into rob-1932_bash_tool_cli_approval
nherment Aug 29, 2025
862634b
fix: merge issue, remove investigation_id
nherment Aug 29, 2025
fc7c650
chore: linting
nherment Aug 29, 2025
1a283f1
chore: address PR comments
nherment Sep 1, 2025
f68df90
Merge branch 'master' into rob-1932_bash_tool_cli_approval
nherment Sep 1, 2025
1e09ceb
Merge branch 'master' into rob-1932_bash_tool_cli_approval
nherment Sep 1, 2025
530a7cb
Merge branch 'master' into rob-1932_bash_tool_cli_approval
nherment Sep 1, 2025
f3150ae
Merge branch 'master' into rob-1932_bash_tool_cli_approval
nherment Sep 2, 2025
8cadae8
chore: linting
nherment Sep 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions holmes/common/env_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,6 @@ def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:

# When using the bash tool, setting BASH_TOOL_UNSAFE_ALLOW_ALL will skip any command validation and run any command requested by the LLM
BASH_TOOL_UNSAFE_ALLOW_ALL = load_bool("BASH_TOOL_UNSAFE_ALLOW_ALL", False)

# For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
200 changes: 134 additions & 66 deletions holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import textwrap
import uuid
from typing import Dict, List, Optional, Type, Union
from typing import Dict, List, Optional, Type, Union, Callable

import sentry_sdk
from openai import BadRequestError
Expand Down Expand Up @@ -284,6 +284,9 @@ def __init__(
self.tracer = tracer
self.llm = llm
self.investigation_id = str(uuid.uuid4())
self.approval_callback: Optional[
Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
] = None

def prompt_call(
self,
Expand Down Expand Up @@ -469,7 +472,7 @@ def call( # type: ignore
logging.debug(f"Tool to call: {t}")
futures.append(
executor.submit(
self._invoke_tool,
self._invoke_llm_tool_call,
tool_to_call=t,
previous_tool_calls=tool_calls,
trace_span=trace_span,
Expand All @@ -480,6 +483,8 @@ def call( # type: ignore
for future in concurrent.futures.as_completed(futures):
tool_call_result: ToolCallResult = future.result()

tool_call_result = self.handle_tool_call_approval(tool_call_result)

tool_calls.append(tool_call_result.as_tool_result_response())
messages.append(tool_call_result.as_tool_call_message())

Expand All @@ -494,7 +499,63 @@ def call( # type: ignore

raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")

def _invoke_tool(
def _directly_invoke_tool(
self,
tool_name: str,
tool_params: dict,
user_approved: bool,
trace_span=DummySpan(),
tool_number: Optional[int] = None,
) -> StructuredToolResult:
tool_span = trace_span.start_span(name=tool_name, type="tool")
tool = self.tool_executor.get_tool_by_name(tool_name)
tool_response = None
try:
if (not tool) or (tool_params is None):
logging.warning(
f"Skipping tool execution for {tool_name}: args: {tool_params}"
)
tool_response = StructuredToolResult(
status=ToolResultStatus.ERROR,
error=f"Failed to find tool {tool_name}",
params=tool_params,
)
else:
tool_response = tool.invoke(
tool_params, tool_number=tool_number, user_approved=user_approved
)
except Exception as e:
logging.error(
f"Tool call to {tool_name} failed with an Exception", exc_info=True
)
tool_response = StructuredToolResult(
status=ToolResultStatus.ERROR,
error=f"Tool call failed: {e}",
params=tool_params,
)

# Log error to trace span
tool_span.log(
input=tool_params, output=str(e), metadata={"status": "ERROR"}
)

tool_span.log(
input=tool_params,
output=tool_response.data,
metadata={
"status": tool_response.status.value,
"error": tool_response.error,
"description": tool.get_parameterized_one_liner(tool_params)
if tool
else "",
"structured_tool_result": tool_response,
},
)
tool_span.end()

return tool_response

def _invoke_llm_tool_call(
self,
tool_to_call: ChatCompletionMessageToolCall,
previous_tool_calls: list[dict],
Expand Down Expand Up @@ -523,92 +584,97 @@ def _invoke_tool(
),
)

tool_params = None
tool_params = {}
try:
tool_params = json.loads(tool_arguments)
except Exception:
logging.warning(
f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
)
tool_call_id = tool_to_call.id
tool = self.tool_executor.get_tool_by_name(tool_name)

if (not tool) or (tool_params is None):
logging.warning(
f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
)
return ToolCallResult(
tool_call_id=tool_call_id,
tool_name=tool_name,
description="NA",
result=StructuredToolResult(
status=ToolResultStatus.ERROR,
error=f"Failed to find tool {tool_name}",
params=tool_params,
),
)

tool_response = None
tool_call_id = tool_to_call.id

# Create tool span if tracing is enabled
tool_span = trace_span.start_span(name=tool_name, type="tool")
tool_response = prevent_overly_repeated_tool_call(
tool_name=tool_name,
tool_params=tool_params,
tool_calls=previous_tool_calls,
)

try:
tool_response = prevent_overly_repeated_tool_call(
tool_name=tool.name,
if not tool_response:
tool_response = self._directly_invoke_tool(
tool_name=tool_name,
tool_params=tool_params,
tool_calls=previous_tool_calls,
user_approved=False,
trace_span=trace_span,
tool_number=tool_number,
)
if not tool_response:
tool_response = tool.invoke(tool_params, tool_number=tool_number)

if not isinstance(tool_response, StructuredToolResult):
# Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
logging.error(
f"Tool {tool.name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
)
tool_response = StructuredToolResult(
status=ToolResultStatus.SUCCESS,
data=tool_response,
params=tool_params,
)

# Log tool execution to trace span
tool_span.log(
input=tool_params,
output=tool_response.data,
metadata={
"status": tool_response.status.value,
"error": tool_response.error,
"description": tool.get_parameterized_one_liner(tool_params),
"structured_tool_result": tool_response,
},
)

except Exception as e:
if not isinstance(tool_response, StructuredToolResult):
# Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
logging.error(
f"Tool call to {tool_name} failed with an Exception", exc_info=True
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
)
tool_response = StructuredToolResult(
status=ToolResultStatus.ERROR,
error=f"Tool call failed: {e}",
status=ToolResultStatus.SUCCESS,
data=tool_response,
params=tool_params,
)

# Log error to trace span
tool_span.log(
input=tool_params, output=str(e), metadata={"status": "ERROR"}
)
finally:
# End tool span
tool_span.end()
tool = self.tool_executor.get_tool_by_name(tool_name)
return ToolCallResult(
tool_call_id=tool_call_id,
tool_name=tool_name,
description=tool.get_parameterized_one_liner(tool_params),
description=tool.get_parameterized_one_liner(tool_params) if tool else "",
result=tool_response,
)

def handle_tool_call_approval(
self, tool_call_result: ToolCallResult
) -> ToolCallResult:
"""
Handle approval for a single tool call if required.

Args:
tool_call_result: A single tool call result that may require approval

Returns:
Updated tool call result with approved/denied status
"""

if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
return tool_call_result

# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
if not self.approval_callback:
tool_call_result.result.status = ToolResultStatus.ERROR
return tool_call_result

# Get approval from user
approved, feedback = self.approval_callback(tool_call_result.result)

if approved:
logging.debug(
f"User approved command: {tool_call_result.result.invocation}"
)

new_response = self._directly_invoke_tool(
tool_name=tool_call_result.tool_name,
tool_params=tool_call_result.result.params or {},
user_approved=True,
trace_span=DummySpan(),
tool_number=None, # Could be extracted if needed
)
tool_call_result.result = new_response
else:
# User denied - update to error
feedback_text = f" User feedback: {feedback}" if feedback else ""
tool_call_result.result.status = ToolResultStatus.ERROR
tool_call_result.result.error = (
f"User denied command execution.{feedback_text}"
)

return tool_call_result

@staticmethod
def __load_post_processing_user_prompt(
input_prompt, investigation, user_prompt: Optional[str] = None
Expand Down Expand Up @@ -789,7 +855,7 @@ def call_stream(
for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
futures.append(
executor.submit(
self._invoke_tool,
self._invoke_llm_tool_call,
tool_to_call=t, # type: ignore
previous_tool_calls=tool_calls,
trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
Expand All @@ -804,6 +870,8 @@ def call_stream(
for future in concurrent.futures.as_completed(futures):
tool_call_result: ToolCallResult = future.result()

tool_call_result = self.handle_tool_call_approval(tool_call_result)

tool_calls.append(tool_call_result.as_tool_result_response())
messages.append(tool_call_result.as_tool_call_message())

Expand Down
24 changes: 20 additions & 4 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@ class ToolResultStatus(str, Enum):
SUCCESS = "success"
ERROR = "error"
NO_DATA = "no_data"
APPROVAL_REQUIRED = "approval_required"

def to_color(self) -> str:
if self == ToolResultStatus.SUCCESS:
return "green"
elif self == ToolResultStatus.ERROR:
return "red"
elif self == ToolResultStatus.APPROVAL_REQUIRED:
return "yellow"
else:
return "white"

Expand All @@ -38,6 +41,8 @@ def to_emoji(self) -> str:
return "✔"
elif self == ToolResultStatus.ERROR:
return "❌"
elif self == ToolResultStatus.APPROVAL_REQUIRED:
return "⚠️"
else:
return "⚪️"

Expand Down Expand Up @@ -148,14 +153,17 @@ def get_openai_format(self, target_model: str):
)

def invoke(
self, params: Dict, tool_number: Optional[int] = None
self,
params: Dict,
tool_number: Optional[int] = None,
user_approved: bool = False,
) -> StructuredToolResult:
tool_number_str = f"#{tool_number} " if tool_number else ""
logging.info(
f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
)
start_time = time.time()
result = self._invoke(params)
result = self._invoke(params=params, user_approved=user_approved)
result.icon_url = self.icon_url
elapsed = time.time() - start_time
output_str = (
Expand All @@ -171,7 +179,13 @@ def invoke(
return result

@abstractmethod
def _invoke(self, params: Dict) -> StructuredToolResult:
def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
"""
params: the tool params
user_approved: whether the tool call is approved by the user. Can be used to confidently execute unsafe actions.
"""
pass

@abstractmethod
Expand Down Expand Up @@ -223,7 +237,9 @@ def _get_status(self, return_code: int, raw_output: str) -> ToolResultStatus:
return ToolResultStatus.NO_DATA
return ToolResultStatus.SUCCESS

def _invoke(self, params) -> StructuredToolResult:
def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
if self.command is not None:
raw_output, return_code, invocation = self.__invoke_command(params)
else:
Expand Down
Loading
Loading