Skip to content

Commit 4504acf

Browse files
authored
Merge branch 'master' into prometheus-context-window-fixes
2 parents 8a9aa62 + 7d3ec48 commit 4504acf

File tree

78 files changed

+1024
-644
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+1024
-644
lines changed

holmes/common/env_vars.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,10 @@ def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
7474
ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
7575

7676
MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 200))
77+
78+
# Limit each tool response to N% of the total context window.
79+
# Number between 0 and 100
80+
# Setting to either 0 or any number above 100 disables the logic that limits tool response size
81+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
82+
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 10)
83+
)

holmes/core/safeguards.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
77
from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
8-
from holmes.core.tools import StructuredToolResult, ToolResultStatus
8+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
99
from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams
1010

1111

@@ -39,7 +39,7 @@ def _has_previous_unfiltered_pod_logs_call(
3939
result = tool_call.get("result", {})
4040
if (
4141
tool_call.get("tool_name") == POD_LOGGING_TOOL_NAME
42-
and result.get("status") == ToolResultStatus.NO_DATA
42+
and result.get("status") == StructuredToolResultStatus.NO_DATA
4343
and result.get("params")
4444
):
4545
params = FetchPodLogsParams(**result.get("params"))
@@ -94,7 +94,7 @@ def prevent_overly_repeated_tool_call(
9494
For example if Holmes checks if a resource is deployed, runs a command to deploy it and then checks again if it has deployed properly.
9595
"""
9696
return StructuredToolResult(
97-
status=ToolResultStatus.ERROR,
97+
status=StructuredToolResultStatus.ERROR,
9898
error=(
9999
"Refusing to run this tool call because it has already been called during this session with the exact same parameters.\n"
100100
"Move on with your investigation to a different tool or change the parameter values."
@@ -106,7 +106,7 @@ def prevent_overly_repeated_tool_call(
106106
tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
107107
):
108108
return StructuredToolResult(
109-
status=ToolResultStatus.ERROR,
109+
status=StructuredToolResultStatus.ERROR,
110110
error=(
111111
f"Refusing to run this tool call because the exact same {POD_LOGGING_TOOL_NAME} tool call without filter has already run and returned no data.\n"
112112
"This tool call would also have returned no data.\n"

holmes/core/tool_calling_llm.py

Lines changed: 21 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,22 @@
3232
from holmes.core.resource_instruction import ResourceInstructions
3333
from holmes.core.runbooks import RunbookManager
3434
from holmes.core.safeguards import prevent_overly_repeated_tool_call
35-
from holmes.core.tools import StructuredToolResult, ToolResultStatus
35+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
36+
from holmes.core.tools_utils.tool_context_window_limiter import (
37+
prevent_overly_big_tool_response,
38+
)
3639
from holmes.plugins.prompts import load_and_render_prompt
3740
from holmes.utils.global_instructions import (
3841
Instructions,
3942
add_global_instructions_to_user_prompt,
4043
)
4144
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
4245
from holmes.core.tools_utils.tool_executor import ToolExecutor
46+
from holmes.core.tools_utils.data_types import (
47+
TruncationResult,
48+
ToolCallResult,
49+
TruncationMetadata,
50+
)
4351
from holmes.core.tracing import DummySpan
4452
from holmes.utils.colors import AI_COLOR
4553
from holmes.utils.stream import StreamEvents, StreamMessage
@@ -119,34 +127,6 @@ def _process_cost_info(
119127
logging.debug(f"Could not extract cost information: {e}")
120128

121129

122-
class TruncationMetadata(BaseModel):
123-
tool_call_id: str
124-
start_index: int
125-
end_index: int
126-
127-
128-
class TruncationResult(BaseModel):
129-
truncated_messages: List[dict]
130-
truncations: List[TruncationMetadata]
131-
132-
133-
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
134-
tool_response = tool_result.data
135-
if isinstance(tool_result.data, str):
136-
tool_response = tool_result.data
137-
else:
138-
try:
139-
if isinstance(tool_result.data, BaseModel):
140-
tool_response = tool_result.data.model_dump_json(indent=2)
141-
else:
142-
tool_response = json.dumps(tool_result.data, indent=2)
143-
except Exception:
144-
tool_response = str(tool_result.data)
145-
if tool_result.status == ToolResultStatus.ERROR:
146-
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
147-
return tool_response
148-
149-
150130
# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
151131
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
152132
# We should fix this in the future
@@ -249,52 +229,6 @@ def truncate_messages_to_fit_context(
249229
return TruncationResult(truncated_messages=messages, truncations=truncations)
250230

251231

252-
class ToolCallResult(BaseModel):
253-
tool_call_id: str
254-
tool_name: str
255-
description: str
256-
result: StructuredToolResult
257-
size: Optional[int] = None
258-
259-
def as_tool_call_message(self):
260-
content = format_tool_result_data(self.result)
261-
if self.result.params:
262-
content = (
263-
f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
264-
+ content
265-
)
266-
return {
267-
"tool_call_id": self.tool_call_id,
268-
"role": "tool",
269-
"name": self.tool_name,
270-
"content": content,
271-
}
272-
273-
def as_tool_result_response(self):
274-
result_dump = self.result.model_dump()
275-
result_dump["data"] = self.result.get_stringified_data()
276-
277-
return {
278-
"tool_call_id": self.tool_call_id,
279-
"tool_name": self.tool_name,
280-
"description": self.description,
281-
"role": "tool",
282-
"result": result_dump,
283-
}
284-
285-
def as_streaming_tool_result_response(self):
286-
result_dump = self.result.model_dump()
287-
result_dump["data"] = self.result.get_stringified_data()
288-
289-
return {
290-
"tool_call_id": self.tool_call_id,
291-
"role": "tool",
292-
"description": self.description,
293-
"name": self.tool_name,
294-
"result": result_dump,
295-
}
296-
297-
298232
class LLMResult(LLMCosts):
299233
tool_calls: Optional[List[ToolCallResult]] = None
300234
result: Optional[str] = None
@@ -539,7 +473,7 @@ def call( # type: ignore
539473

540474
if (
541475
tool_call_result.result.status
542-
== ToolResultStatus.APPROVAL_REQUIRED
476+
== StructuredToolResultStatus.APPROVAL_REQUIRED
543477
):
544478
with trace_span.start_span(type="tool") as tool_span:
545479
tool_call_result = self._handle_tool_call_approval(
@@ -577,7 +511,7 @@ def _directly_invoke_tool_call(
577511
f"Skipping tool execution for {tool_name}: args: {tool_params}"
578512
)
579513
return StructuredToolResult(
580-
status=ToolResultStatus.ERROR,
514+
status=StructuredToolResultStatus.ERROR,
581515
error=f"Failed to find tool {tool_name}",
582516
params=tool_params,
583517
)
@@ -591,7 +525,7 @@ def _directly_invoke_tool_call(
591525
f"Tool call to {tool_name} failed with an Exception", exc_info=True
592526
)
593527
tool_response = StructuredToolResult(
594-
status=ToolResultStatus.ERROR,
528+
status=StructuredToolResultStatus.ERROR,
595529
error=f"Tool call failed: {e}",
596530
params=tool_params,
597531
)
@@ -633,7 +567,7 @@ def _get_tool_call_result(
633567
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
634568
)
635569
tool_response = StructuredToolResult(
636-
status=ToolResultStatus.SUCCESS,
570+
status=StructuredToolResultStatus.SUCCESS,
637571
data=tool_response,
638572
params=tool_params,
639573
)
@@ -683,7 +617,7 @@ def _invoke_llm_tool_call(
683617
tool_name=tool_name,
684618
description="NA",
685619
result=StructuredToolResult(
686-
status=ToolResultStatus.ERROR,
620+
status=StructuredToolResultStatus.ERROR,
687621
error="Custom tool calls are not supported",
688622
params=None,
689623
),
@@ -699,6 +633,11 @@ def _invoke_llm_tool_call(
699633
previous_tool_calls=previous_tool_calls,
700634
tool_number=tool_number,
701635
)
636+
637+
prevent_overly_big_tool_response(
638+
tool_call_result=tool_call_result, llm=self.llm
639+
)
640+
702641
ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
703642
return tool_call_result
704643

@@ -720,7 +659,7 @@ def _handle_tool_call_approval(
720659

721660
# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
722661
if not self.approval_callback:
723-
tool_call_result.result.status = ToolResultStatus.ERROR
662+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
724663
return tool_call_result
725664

726665
# Get approval from user
@@ -740,7 +679,7 @@ def _handle_tool_call_approval(
740679
else:
741680
# User denied - update to error
742681
feedback_text = f" User feedback: {feedback}" if feedback else ""
743-
tool_call_result.result.status = ToolResultStatus.ERROR
682+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
744683
tool_call_result.result.error = (
745684
f"User denied command execution.{feedback_text}"
746685
)
@@ -952,7 +891,6 @@ def call_stream(
952891

953892
for future in concurrent.futures.as_completed(futures):
954893
tool_call_result: ToolCallResult = future.result()
955-
956894
tool_calls.append(tool_call_result.as_tool_result_response())
957895
messages.append(tool_call_result.as_tool_call_message())
958896

holmes/core/tools.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,36 +48,36 @@
4848
logger = logging.getLogger(__name__)
4949

5050

51-
class ToolResultStatus(str, Enum):
51+
class StructuredToolResultStatus(str, Enum):
5252
SUCCESS = "success"
5353
ERROR = "error"
5454
NO_DATA = "no_data"
5555
APPROVAL_REQUIRED = "approval_required"
5656

5757
def to_color(self) -> str:
58-
if self == ToolResultStatus.SUCCESS:
58+
if self == StructuredToolResultStatus.SUCCESS:
5959
return "green"
60-
elif self == ToolResultStatus.ERROR:
60+
elif self == StructuredToolResultStatus.ERROR:
6161
return "red"
62-
elif self == ToolResultStatus.APPROVAL_REQUIRED:
62+
elif self == StructuredToolResultStatus.APPROVAL_REQUIRED:
6363
return "yellow"
6464
else:
6565
return "white"
6666

6767
def to_emoji(self) -> str:
68-
if self == ToolResultStatus.SUCCESS:
68+
if self == StructuredToolResultStatus.SUCCESS:
6969
return "✔"
70-
elif self == ToolResultStatus.ERROR:
70+
elif self == StructuredToolResultStatus.ERROR:
7171
return "❌"
72-
elif self == ToolResultStatus.APPROVAL_REQUIRED:
72+
elif self == StructuredToolResultStatus.APPROVAL_REQUIRED:
7373
return "⚠️"
7474
else:
7575
return "⚪️"
7676

7777

7878
class StructuredToolResult(BaseModel):
7979
schema_version: str = "robusta:v1.0.0"
80-
status: ToolResultStatus
80+
status: StructuredToolResultStatus
8181
error: Optional[str] = None
8282
return_code: Optional[int] = None
8383
data: Optional[Any] = None
@@ -261,7 +261,10 @@ def _apply_transformers(self, result: StructuredToolResult) -> StructuredToolRes
261261
Returns:
262262
The tool result with transformed data, or original result if transformation fails
263263
"""
264-
if not self._transformer_instances or result.status != ToolResultStatus.SUCCESS:
264+
if (
265+
not self._transformer_instances
266+
or result.status != StructuredToolResultStatus.SUCCESS
267+
):
265268
return result
266269

267270
# Get the output string to transform
@@ -387,12 +390,14 @@ def _build_context(self, params):
387390
context = {**params}
388391
return context
389392

390-
def _get_status(self, return_code: int, raw_output: str) -> ToolResultStatus:
393+
def _get_status(
394+
self, return_code: int, raw_output: str
395+
) -> StructuredToolResultStatus:
391396
if return_code != 0:
392-
return ToolResultStatus.ERROR
397+
return StructuredToolResultStatus.ERROR
393398
if raw_output == "":
394-
return ToolResultStatus.NO_DATA
395-
return ToolResultStatus.SUCCESS
399+
return StructuredToolResultStatus.NO_DATA
400+
return StructuredToolResultStatus.SUCCESS
396401

397402
def _invoke(
398403
self, params: dict, user_approved: bool = False
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import json
2+
from typing import Optional
3+
from pydantic import BaseModel
4+
5+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
6+
7+
8+
class TruncationMetadata(BaseModel):
9+
tool_call_id: str
10+
start_index: int
11+
end_index: int
12+
13+
14+
class TruncationResult(BaseModel):
15+
truncated_messages: list[dict]
16+
truncations: list[TruncationMetadata]
17+
18+
19+
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
20+
tool_response = tool_result.data
21+
if isinstance(tool_result.data, str):
22+
tool_response = tool_result.data
23+
else:
24+
try:
25+
if isinstance(tool_result.data, BaseModel):
26+
tool_response = tool_result.data.model_dump_json(indent=2)
27+
else:
28+
tool_response = json.dumps(tool_result.data, indent=2)
29+
except Exception:
30+
tool_response = str(tool_result.data)
31+
if tool_result.status == StructuredToolResultStatus.ERROR:
32+
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
33+
return tool_response
34+
35+
36+
class ToolCallResult(BaseModel):
37+
tool_call_id: str
38+
tool_name: str
39+
description: str
40+
result: StructuredToolResult
41+
size: Optional[int] = None
42+
43+
def as_tool_call_message(self):
44+
content = format_tool_result_data(self.result)
45+
if self.result.params:
46+
content = (
47+
f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
48+
+ content
49+
)
50+
return {
51+
"tool_call_id": self.tool_call_id,
52+
"role": "tool",
53+
"name": self.tool_name,
54+
"content": content,
55+
}
56+
57+
def as_tool_result_response(self):
58+
result_dump = self.result.model_dump()
59+
result_dump["data"] = self.result.get_stringified_data()
60+
61+
return {
62+
"tool_call_id": self.tool_call_id,
63+
"tool_name": self.tool_name,
64+
"description": self.description,
65+
"role": "tool",
66+
"result": result_dump,
67+
}
68+
69+
def as_streaming_tool_result_response(self):
70+
result_dump = self.result.model_dump()
71+
result_dump["data"] = self.result.get_stringified_data()
72+
73+
return {
74+
"tool_call_id": self.tool_call_id,
75+
"role": "tool",
76+
"description": self.description,
77+
"name": self.tool_name,
78+
"result": result_dump,
79+
}

0 commit comments

Comments
 (0)