Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions holmes/common/env_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,10 @@ def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)

MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 300))

# Limit each tool response to N% of the total context window.
# Number between 0 and 100
# Setting to either 0 or any number above 100 disables the logic that limits tool response size
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 0)
)
8 changes: 4 additions & 4 deletions holmes/core/safeguards.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
from holmes.core.tools import StructuredToolResult, ToolResultStatus
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams


Expand Down Expand Up @@ -39,7 +39,7 @@ def _has_previous_unfiltered_pod_logs_call(
result = tool_call.get("result", {})
if (
tool_call.get("tool_name") == POD_LOGGING_TOOL_NAME
and result.get("status") == ToolResultStatus.NO_DATA
and result.get("status") == StructuredToolResultStatus.NO_DATA
and result.get("params")
):
params = FetchPodLogsParams(**result.get("params"))
Expand Down Expand Up @@ -94,7 +94,7 @@ def prevent_overly_repeated_tool_call(
For example if Holmes checks if a resource is deployed, runs a command to deploy it and then checks again if it has deployed properly.
"""
return StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error=(
"Refusing to run this tool call because it has already been called during this session with the exact same parameters.\n"
"Move on with your investigation to a different tool or change the parameter values."
Expand All @@ -106,7 +106,7 @@ def prevent_overly_repeated_tool_call(
tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
):
return StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error=(
f"Refusing to run this tool call because the exact same {POD_LOGGING_TOOL_NAME} tool call without filter has already run and returned no data.\n"
"This tool call would also have returned no data.\n"
Expand Down
106 changes: 23 additions & 83 deletions holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,22 @@
from holmes.core.resource_instruction import ResourceInstructions
from holmes.core.runbooks import RunbookManager
from holmes.core.safeguards import prevent_overly_repeated_tool_call
from holmes.core.tools import StructuredToolResult, ToolResultStatus
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.core.tools_utils.tool_context_window_limiter import (
prevent_overly_big_tool_response,
)
from holmes.plugins.prompts import load_and_render_prompt
from holmes.utils.global_instructions import (
Instructions,
add_global_instructions_to_user_prompt,
)
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
from holmes.core.tools_utils.tool_executor import ToolExecutor
from holmes.core.tools_utils.data_types import (
TruncationResult,
ToolCallResult,
TruncationMetadata,
)
from holmes.core.tracing import DummySpan
from holmes.utils.colors import AI_COLOR
from holmes.utils.stream import StreamEvents, StreamMessage
Expand Down Expand Up @@ -119,34 +127,6 @@ def _process_cost_info(
logging.debug(f"Could not extract cost information: {e}")


class TruncationMetadata(BaseModel):
tool_call_id: str
start_index: int
end_index: int


class TruncationResult(BaseModel):
truncated_messages: List[dict]
truncations: List[TruncationMetadata]


def format_tool_result_data(tool_result: StructuredToolResult) -> str:
tool_response = tool_result.data
if isinstance(tool_result.data, str):
tool_response = tool_result.data
else:
try:
if isinstance(tool_result.data, BaseModel):
tool_response = tool_result.data.model_dump_json(indent=2)
else:
tool_response = json.dumps(tool_result.data, indent=2)
except Exception:
tool_response = str(tool_result.data)
if tool_result.status == ToolResultStatus.ERROR:
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
return tool_response


# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
# We should fix this in the future
Expand Down Expand Up @@ -249,52 +229,6 @@ def truncate_messages_to_fit_context(
return TruncationResult(truncated_messages=messages, truncations=truncations)


class ToolCallResult(BaseModel):
tool_call_id: str
tool_name: str
description: str
result: StructuredToolResult
size: Optional[int] = None

def as_tool_call_message(self):
content = format_tool_result_data(self.result)
if self.result.params:
content = (
f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
+ content
)
return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"name": self.tool_name,
"content": content,
}

def as_tool_result_response(self):
result_dump = self.result.model_dump()
result_dump["data"] = self.result.get_stringified_data()

return {
"tool_call_id": self.tool_call_id,
"tool_name": self.tool_name,
"description": self.description,
"role": "tool",
"result": result_dump,
}

def as_streaming_tool_result_response(self):
result_dump = self.result.model_dump()
result_dump["data"] = self.result.get_stringified_data()

return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"description": self.description,
"name": self.tool_name,
"result": result_dump,
}


class LLMResult(LLMCosts):
tool_calls: Optional[List[ToolCallResult]] = None
result: Optional[str] = None
Expand Down Expand Up @@ -537,9 +471,13 @@ def call( # type: ignore
else None
)

prevent_overly_big_tool_response(
tool_call_result=tool_call_result, llm=self.llm
)

if (
tool_call_result.result.status
== ToolResultStatus.APPROVAL_REQUIRED
== StructuredToolResultStatus.APPROVAL_REQUIRED
):
with trace_span.start_span(type="tool") as tool_span:
tool_call_result = self._handle_tool_call_approval(
Expand Down Expand Up @@ -577,7 +515,7 @@ def _directly_invoke_tool_call(
f"Skipping tool execution for {tool_name}: args: {tool_params}"
)
return StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error=f"Failed to find tool {tool_name}",
params=tool_params,
)
Expand All @@ -591,7 +529,7 @@ def _directly_invoke_tool_call(
f"Tool call to {tool_name} failed with an Exception", exc_info=True
)
tool_response = StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error=f"Tool call failed: {e}",
params=tool_params,
)
Expand Down Expand Up @@ -633,7 +571,7 @@ def _get_tool_call_result(
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
)
tool_response = StructuredToolResult(
status=ToolResultStatus.SUCCESS,
status=StructuredToolResultStatus.SUCCESS,
data=tool_response,
params=tool_params,
)
Expand Down Expand Up @@ -683,7 +621,7 @@ def _invoke_llm_tool_call(
tool_name=tool_name,
description="NA",
result=StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error="Custom tool calls are not supported",
params=None,
),
Expand Down Expand Up @@ -720,7 +658,7 @@ def _handle_tool_call_approval(

# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
if not self.approval_callback:
tool_call_result.result.status = ToolResultStatus.ERROR
tool_call_result.result.status = StructuredToolResultStatus.ERROR
return tool_call_result

# Get approval from user
Expand All @@ -740,7 +678,7 @@ def _handle_tool_call_approval(
else:
# User denied - update to error
feedback_text = f" User feedback: {feedback}" if feedback else ""
tool_call_result.result.status = ToolResultStatus.ERROR
tool_call_result.result.status = StructuredToolResultStatus.ERROR
tool_call_result.result.error = (
f"User denied command execution.{feedback_text}"
)
Expand Down Expand Up @@ -952,7 +890,9 @@ def call_stream(

for future in concurrent.futures.as_completed(futures):
tool_call_result: ToolCallResult = future.result()

prevent_overly_big_tool_response(
tool_call_result=tool_call_result, llm=self.llm
)
tool_calls.append(tool_call_result.as_tool_result_response())
messages.append(tool_call_result.as_tool_call_message())

Expand Down
31 changes: 18 additions & 13 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,36 +48,36 @@
logger = logging.getLogger(__name__)


class ToolResultStatus(str, Enum):
class StructuredToolResultStatus(str, Enum):
SUCCESS = "success"
ERROR = "error"
NO_DATA = "no_data"
APPROVAL_REQUIRED = "approval_required"

def to_color(self) -> str:
if self == ToolResultStatus.SUCCESS:
if self == StructuredToolResultStatus.SUCCESS:
return "green"
elif self == ToolResultStatus.ERROR:
elif self == StructuredToolResultStatus.ERROR:
return "red"
elif self == ToolResultStatus.APPROVAL_REQUIRED:
elif self == StructuredToolResultStatus.APPROVAL_REQUIRED:
return "yellow"
else:
return "white"

def to_emoji(self) -> str:
if self == ToolResultStatus.SUCCESS:
if self == StructuredToolResultStatus.SUCCESS:
return "✔"
elif self == ToolResultStatus.ERROR:
elif self == StructuredToolResultStatus.ERROR:
return "❌"
elif self == ToolResultStatus.APPROVAL_REQUIRED:
elif self == StructuredToolResultStatus.APPROVAL_REQUIRED:
return "⚠️"
else:
return "⚪️"


class StructuredToolResult(BaseModel):
schema_version: str = "robusta:v1.0.0"
status: ToolResultStatus
status: StructuredToolResultStatus
error: Optional[str] = None
return_code: Optional[int] = None
data: Optional[Any] = None
Expand Down Expand Up @@ -261,7 +261,10 @@ def _apply_transformers(self, result: StructuredToolResult) -> StructuredToolRes
Returns:
The tool result with transformed data, or original result if transformation fails
"""
if not self._transformer_instances or result.status != ToolResultStatus.SUCCESS:
if (
not self._transformer_instances
or result.status != StructuredToolResultStatus.SUCCESS
):
return result

# Get the output string to transform
Expand Down Expand Up @@ -387,12 +390,14 @@ def _build_context(self, params):
context = {**params}
return context

def _get_status(self, return_code: int, raw_output: str) -> ToolResultStatus:
def _get_status(
self, return_code: int, raw_output: str
) -> StructuredToolResultStatus:
if return_code != 0:
return ToolResultStatus.ERROR
return StructuredToolResultStatus.ERROR
if raw_output == "":
return ToolResultStatus.NO_DATA
return ToolResultStatus.SUCCESS
return StructuredToolResultStatus.NO_DATA
return StructuredToolResultStatus.SUCCESS

def _invoke(
self, params: dict, user_approved: bool = False
Expand Down
79 changes: 79 additions & 0 deletions holmes/core/tools_utils/data_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import json
from typing import Optional
from pydantic import BaseModel

from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus


class TruncationMetadata(BaseModel):
tool_call_id: str
start_index: int
end_index: int


class TruncationResult(BaseModel):
truncated_messages: list[dict]
truncations: list[TruncationMetadata]


def format_tool_result_data(tool_result: StructuredToolResult) -> str:
tool_response = tool_result.data
if isinstance(tool_result.data, str):
tool_response = tool_result.data
else:
try:
if isinstance(tool_result.data, BaseModel):
tool_response = tool_result.data.model_dump_json(indent=2)
else:
tool_response = json.dumps(tool_result.data, indent=2)
except Exception:
tool_response = str(tool_result.data)
if tool_result.status == StructuredToolResultStatus.ERROR:
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
return tool_response


class ToolCallResult(BaseModel):
tool_call_id: str
tool_name: str
description: str
result: StructuredToolResult
size: Optional[int] = None

def as_tool_call_message(self):
content = format_tool_result_data(self.result)
if self.result.params:
content = (
f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
+ content
)
return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"name": self.tool_name,
"content": content,
}

def as_tool_result_response(self):
result_dump = self.result.model_dump()
result_dump["data"] = self.result.get_stringified_data()

return {
"tool_call_id": self.tool_call_id,
"tool_name": self.tool_name,
"description": self.description,
"role": "tool",
"result": result_dump,
}

def as_streaming_tool_result_response(self):
result_dump = self.result.model_dump()
result_dump["data"] = self.result.get_stringified_data()

return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"description": self.description,
"name": self.tool_name,
"result": result_dump,
}
Loading
Loading