Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions holmes/common/env_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,10 @@ def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)

MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 300))

# Limit each tool response to N% of the total context window.
# Number between 0 and 100
# Setting to either 0 or any number above 100 disables the logic that limits tool response size
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 10)
)
8 changes: 4 additions & 4 deletions holmes/core/safeguards.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
from holmes.core.tools import StructuredToolResult, ToolResultStatus
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams


Expand Down Expand Up @@ -39,7 +39,7 @@ def _has_previous_unfiltered_pod_logs_call(
result = tool_call.get("result", {})
if (
tool_call.get("tool_name") == POD_LOGGING_TOOL_NAME
and result.get("status") == ToolResultStatus.NO_DATA
and result.get("status") == StructuredToolResultStatus.NO_DATA
and result.get("params")
):
params = FetchPodLogsParams(**result.get("params"))
Expand Down Expand Up @@ -94,7 +94,7 @@ def prevent_overly_repeated_tool_call(
For example if Holmes checks if a resource is deployed, runs a command to deploy it and then checks again if it has deployed properly.
"""
return StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error=(
"Refusing to run this tool call because it has already been called during this session with the exact same parameters.\n"
"Move on with your investigation to a different tool or change the parameter values."
Expand All @@ -106,7 +106,7 @@ def prevent_overly_repeated_tool_call(
tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
):
return StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error=(
f"Refusing to run this tool call because the exact same {POD_LOGGING_TOOL_NAME} tool call without filter has already run and returned no data.\n"
"This tool call would also have returned no data.\n"
Expand Down
104 changes: 21 additions & 83 deletions holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,22 @@
from holmes.core.resource_instruction import ResourceInstructions
from holmes.core.runbooks import RunbookManager
from holmes.core.safeguards import prevent_overly_repeated_tool_call
from holmes.core.tools import StructuredToolResult, ToolResultStatus
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.core.tools_utils.tool_context_window_limiter import (
prevent_overly_big_tool_response,
)
from holmes.plugins.prompts import load_and_render_prompt
from holmes.utils.global_instructions import (
Instructions,
add_global_instructions_to_user_prompt,
)
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
from holmes.core.tools_utils.tool_executor import ToolExecutor
from holmes.core.tools_utils.data_types import (
TruncationResult,
ToolCallResult,
TruncationMetadata,
)
from holmes.core.tracing import DummySpan
from holmes.utils.colors import AI_COLOR
from holmes.utils.stream import StreamEvents, StreamMessage
Expand Down Expand Up @@ -119,34 +127,6 @@ def _process_cost_info(
logging.debug(f"Could not extract cost information: {e}")


class TruncationMetadata(BaseModel):
tool_call_id: str
start_index: int
end_index: int


class TruncationResult(BaseModel):
truncated_messages: List[dict]
truncations: List[TruncationMetadata]


def format_tool_result_data(tool_result: StructuredToolResult) -> str:
tool_response = tool_result.data
if isinstance(tool_result.data, str):
tool_response = tool_result.data
else:
try:
if isinstance(tool_result.data, BaseModel):
tool_response = tool_result.data.model_dump_json(indent=2)
else:
tool_response = json.dumps(tool_result.data, indent=2)
except Exception:
tool_response = str(tool_result.data)
if tool_result.status == ToolResultStatus.ERROR:
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
return tool_response


# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
# We should fix this in the future
Expand Down Expand Up @@ -249,52 +229,6 @@ def truncate_messages_to_fit_context(
return TruncationResult(truncated_messages=messages, truncations=truncations)


class ToolCallResult(BaseModel):
tool_call_id: str
tool_name: str
description: str
result: StructuredToolResult
size: Optional[int] = None

def as_tool_call_message(self):
content = format_tool_result_data(self.result)
if self.result.params:
content = (
f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
+ content
)
return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"name": self.tool_name,
"content": content,
}

def as_tool_result_response(self):
result_dump = self.result.model_dump()
result_dump["data"] = self.result.get_stringified_data()

return {
"tool_call_id": self.tool_call_id,
"tool_name": self.tool_name,
"description": self.description,
"role": "tool",
"result": result_dump,
}

def as_streaming_tool_result_response(self):
result_dump = self.result.model_dump()
result_dump["data"] = self.result.get_stringified_data()

return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"description": self.description,
"name": self.tool_name,
"result": result_dump,
}


class LLMResult(LLMCosts):
tool_calls: Optional[List[ToolCallResult]] = None
result: Optional[str] = None
Expand Down Expand Up @@ -539,7 +473,7 @@ def call( # type: ignore

if (
tool_call_result.result.status
== ToolResultStatus.APPROVAL_REQUIRED
== StructuredToolResultStatus.APPROVAL_REQUIRED
):
with trace_span.start_span(type="tool") as tool_span:
tool_call_result = self._handle_tool_call_approval(
Expand Down Expand Up @@ -577,7 +511,7 @@ def _directly_invoke_tool_call(
f"Skipping tool execution for {tool_name}: args: {tool_params}"
)
return StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error=f"Failed to find tool {tool_name}",
params=tool_params,
)
Expand All @@ -591,7 +525,7 @@ def _directly_invoke_tool_call(
f"Tool call to {tool_name} failed with an Exception", exc_info=True
)
tool_response = StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error=f"Tool call failed: {e}",
params=tool_params,
)
Expand Down Expand Up @@ -633,7 +567,7 @@ def _get_tool_call_result(
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
)
tool_response = StructuredToolResult(
status=ToolResultStatus.SUCCESS,
status=StructuredToolResultStatus.SUCCESS,
data=tool_response,
params=tool_params,
)
Expand Down Expand Up @@ -683,7 +617,7 @@ def _invoke_llm_tool_call(
tool_name=tool_name,
description="NA",
result=StructuredToolResult(
status=ToolResultStatus.ERROR,
status=StructuredToolResultStatus.ERROR,
error="Custom tool calls are not supported",
params=None,
),
Expand All @@ -699,6 +633,11 @@ def _invoke_llm_tool_call(
previous_tool_calls=previous_tool_calls,
tool_number=tool_number,
)

prevent_overly_big_tool_response(
tool_call_result=tool_call_result, llm=self.llm
)

ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
return tool_call_result

Expand All @@ -720,7 +659,7 @@ def _handle_tool_call_approval(

# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
if not self.approval_callback:
tool_call_result.result.status = ToolResultStatus.ERROR
tool_call_result.result.status = StructuredToolResultStatus.ERROR
return tool_call_result

# Get approval from user
Expand All @@ -740,7 +679,7 @@ def _handle_tool_call_approval(
else:
# User denied - update to error
feedback_text = f" User feedback: {feedback}" if feedback else ""
tool_call_result.result.status = ToolResultStatus.ERROR
tool_call_result.result.status = StructuredToolResultStatus.ERROR
tool_call_result.result.error = (
f"User denied command execution.{feedback_text}"
)
Expand Down Expand Up @@ -952,7 +891,6 @@ def call_stream(

for future in concurrent.futures.as_completed(futures):
tool_call_result: ToolCallResult = future.result()

tool_calls.append(tool_call_result.as_tool_result_response())
messages.append(tool_call_result.as_tool_call_message())

Expand Down
31 changes: 18 additions & 13 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,36 +48,36 @@
logger = logging.getLogger(__name__)


class ToolResultStatus(str, Enum):
class StructuredToolResultStatus(str, Enum):
SUCCESS = "success"
ERROR = "error"
NO_DATA = "no_data"
APPROVAL_REQUIRED = "approval_required"

def to_color(self) -> str:
if self == ToolResultStatus.SUCCESS:
if self == StructuredToolResultStatus.SUCCESS:
return "green"
elif self == ToolResultStatus.ERROR:
elif self == StructuredToolResultStatus.ERROR:
return "red"
elif self == ToolResultStatus.APPROVAL_REQUIRED:
elif self == StructuredToolResultStatus.APPROVAL_REQUIRED:
return "yellow"
else:
return "white"

def to_emoji(self) -> str:
if self == ToolResultStatus.SUCCESS:
if self == StructuredToolResultStatus.SUCCESS:
return "✔"
elif self == ToolResultStatus.ERROR:
elif self == StructuredToolResultStatus.ERROR:
return "❌"
elif self == ToolResultStatus.APPROVAL_REQUIRED:
elif self == StructuredToolResultStatus.APPROVAL_REQUIRED:
return "⚠️"
else:
return "⚪️"


class StructuredToolResult(BaseModel):
schema_version: str = "robusta:v1.0.0"
status: ToolResultStatus
status: StructuredToolResultStatus
error: Optional[str] = None
return_code: Optional[int] = None
data: Optional[Any] = None
Expand Down Expand Up @@ -261,7 +261,10 @@ def _apply_transformers(self, result: StructuredToolResult) -> StructuredToolRes
Returns:
The tool result with transformed data, or original result if transformation fails
"""
if not self._transformer_instances or result.status != ToolResultStatus.SUCCESS:
if (
not self._transformer_instances
or result.status != StructuredToolResultStatus.SUCCESS
):
return result

# Get the output string to transform
Expand Down Expand Up @@ -387,12 +390,14 @@ def _build_context(self, params):
context = {**params}
return context

def _get_status(self, return_code: int, raw_output: str) -> ToolResultStatus:
def _get_status(
self, return_code: int, raw_output: str
) -> StructuredToolResultStatus:
if return_code != 0:
return ToolResultStatus.ERROR
return StructuredToolResultStatus.ERROR
if raw_output == "":
return ToolResultStatus.NO_DATA
return ToolResultStatus.SUCCESS
return StructuredToolResultStatus.NO_DATA
return StructuredToolResultStatus.SUCCESS

def _invoke(
self, params: dict, user_approved: bool = False
Expand Down
79 changes: 79 additions & 0 deletions holmes/core/tools_utils/data_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import json
from typing import Optional
from pydantic import BaseModel

from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus


class TruncationMetadata(BaseModel):
tool_call_id: str
start_index: int
end_index: int


class TruncationResult(BaseModel):
truncated_messages: list[dict]
truncations: list[TruncationMetadata]


def format_tool_result_data(tool_result: StructuredToolResult) -> str:
tool_response = tool_result.data
if isinstance(tool_result.data, str):
tool_response = tool_result.data
else:
try:
if isinstance(tool_result.data, BaseModel):
tool_response = tool_result.data.model_dump_json(indent=2)
else:
tool_response = json.dumps(tool_result.data, indent=2)
except Exception:
tool_response = str(tool_result.data)
if tool_result.status == StructuredToolResultStatus.ERROR:
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
return tool_response


class ToolCallResult(BaseModel):
tool_call_id: str
tool_name: str
description: str
result: StructuredToolResult
size: Optional[int] = None

def as_tool_call_message(self):
content = format_tool_result_data(self.result)
if self.result.params:
content = (
f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
+ content
)
return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"name": self.tool_name,
"content": content,
}

def as_tool_result_response(self):
result_dump = self.result.model_dump()
result_dump["data"] = self.result.get_stringified_data()

return {
"tool_call_id": self.tool_call_id,
"tool_name": self.tool_name,
"description": self.description,
"role": "tool",
"result": result_dump,
}

def as_streaming_tool_result_response(self):
result_dump = self.result.model_dump()
result_dump["data"] = self.result.get_stringified_data()

return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"description": self.description,
"name": self.tool_name,
"result": result_dump,
}
Loading
Loading