Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5f9af5b
feat: add ability for tool to count tokens
nherment Sep 23, 2025
53945d7
feat: refactor tool.invoke() calls to use new context
nherment Sep 23, 2025
8c8d14b
feat: refactor tool.invoke() calls to use new context
nherment Sep 23, 2025
d9808a4
feat: refactor tool.invoke() calls to use new context
nherment Sep 23, 2025
b6ea5bf
fix test
nherment Sep 23, 2025
5b8e55a
feat: use context for new invoke arg
nherment Sep 23, 2025
80bfe84
fix test
nherment Sep 23, 2025
d05ac95
fix test
nherment Sep 23, 2025
94982af
fix test
nherment Sep 23, 2025
c23115d
fix test
nherment Sep 23, 2025
04a2a40
prometheus token count WIP
nherment Sep 24, 2025
3fa2fcf
prometheus token count
nherment Sep 24, 2025
d9f6702
feat: truncate logs based on token limit
nherment Sep 24, 2025
34a7bb7
fix: correct message format for tool token counting
nherment Sep 24, 2025
01543c0
fix: logs truncation logic
nherment Sep 24, 2025
3cb77fb
chore: address PR comments
nherment Sep 24, 2025
fb94fa9
improvements
nherment Oct 1, 2025
3187728
chore: address PR comments
nherment Oct 1, 2025
1279783
Merge branch 'master' into ROB-2136_link_tool_limit_to_global_truncat…
nherment Oct 1, 2025
515b33c
chore: address PR comments
nherment Oct 1, 2025
9d24e9b
fix: incorrect comment regarding context window tool size limit
nherment Oct 1, 2025
fe92ae3
chore: address PR comments
nherment Oct 1, 2025
b8eda3d
Merge branch 'master' into ROB-2136_link_tool_limit_to_global_truncat…
nherment Oct 1, 2025
faf71ba
chore: address PR comments
nherment Oct 1, 2025
150f090
chore: address PR comments
nherment Oct 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions holmes/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,11 @@ class ToolCallResult(BaseModel):
size: Optional[int] = None

def as_tool_call_message(self):
content = format_tool_result_data(self.result)
if self.result.params:
content = (
f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
+ content
)
return {
"tool_call_id": self.tool_call_id,
"role": "tool",
"name": self.tool_name,
"content": content,
"content": format_tool_result_data(self.result),
}

def as_tool_result_response(self):
Expand Down Expand Up @@ -80,6 +74,12 @@ def format_tool_result_data(tool_result: StructuredToolResult) -> str:
tool_response = str(tool_result.data)
if tool_result.status == StructuredToolResultStatus.ERROR:
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()

if tool_result.params:
tool_response = (
f"Params used for the tool call: {json.dumps(tool_result.params)}. The tool call output follows on the next line.\n"
+ tool_response
)
return tool_response


Expand Down
15 changes: 12 additions & 3 deletions holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,13 @@
from holmes.core.resource_instruction import ResourceInstructions
from holmes.core.runbooks import RunbookManager
from holmes.core.safeguards import prevent_overly_repeated_tool_call
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.core.tools import (
StructuredToolResult,
StructuredToolResultStatus,
ToolInvokeContext,
)
from holmes.core.tools_utils.tool_context_window_limiter import (
get_max_token_count_for_single_tool,
prevent_overly_big_tool_response,
)
from holmes.plugins.prompts import load_and_render_prompt
Expand Down Expand Up @@ -622,9 +627,13 @@ def _directly_invoke_tool_call(
)

try:
tool_response = tool.invoke(
tool_params, tool_number=tool_number, user_approved=user_approved
invoke_context = ToolInvokeContext(
tool_number=tool_number,
user_approved=user_approved,
llm=self.llm,
max_token_count=get_max_token_count_for_single_tool(self.llm),
)
tool_response = tool.invoke(tool_params, context=invoke_context)
except Exception as e:
logging.error(
f"Tool call to {tool_name} failed with an Exception", exc_info=True
Expand Down
27 changes: 20 additions & 7 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
)
from rich.console import Console

from holmes.core.llm import LLM
from holmes.core.openai_formatting import format_tool_to_open_ai_standard
from holmes.plugins.prompts import load_and_render_prompt
from holmes.core.transformers import (
Expand Down Expand Up @@ -159,6 +160,15 @@ class ToolParameter(BaseModel):
items: Optional["ToolParameter"] = None # For array item schemas


class ToolInvokeContext(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)

tool_number: Optional[int] = None
user_approved: bool = False
llm: LLM
max_token_count: int


class Tool(ABC, BaseModel):
name: str
description: str
Expand Down Expand Up @@ -225,15 +235,14 @@ def get_openai_format(self, target_model: str):
def invoke(
self,
params: Dict,
tool_number: Optional[int] = None,
user_approved: bool = False,
context: ToolInvokeContext,
) -> StructuredToolResult:
tool_number_str = f"#{tool_number} " if tool_number else ""
tool_number_str = f"#{context.tool_number} " if context.tool_number else ""
logger.info(
f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
)
start_time = time.time()
result = self._invoke(params=params, user_approved=user_approved)
result = self._invoke(params=params, context=context)
result.icon_url = self.icon_url

# Apply transformers to the result
Expand All @@ -244,7 +253,7 @@ def invoke(
if hasattr(transformed_result, "get_stringified_data")
else str(transformed_result)
)
show_hint = f"/show {tool_number}" if tool_number else "/show"
show_hint = f"/show {context.tool_number}" if context.tool_number else "/show"
line_count = output_str.count("\n") + 1 if output_str else 0
logger.info(
f" [dim]Finished {tool_number_str}in {elapsed:.2f}s, output length: {len(output_str):,} characters ({line_count:,} lines) - {show_hint} to view contents[/dim]"
Expand Down Expand Up @@ -340,7 +349,9 @@ def _apply_transformers(self, result: StructuredToolResult) -> StructuredToolRes

@abstractmethod
def _invoke(
self, params: dict, user_approved: bool = False
self,
params: dict,
context: ToolInvokeContext,
) -> StructuredToolResult:
"""
params: the tool params
Expand Down Expand Up @@ -400,7 +411,9 @@ def _get_status(
return StructuredToolResultStatus.SUCCESS

def _invoke(
self, params: dict, user_approved: bool = False
self,
params: dict,
context: ToolInvokeContext,
) -> StructuredToolResult:
if self.command is not None:
raw_output, return_code, invocation = self.__invoke_command(params)
Expand Down
13 changes: 13 additions & 0 deletions holmes/core/tools_utils/token_counting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from holmes.core.llm import LLM
from holmes.core.models import format_tool_result_data
from holmes.core.tools import StructuredToolResult


def count_tool_response_tokens(
llm: LLM, structured_tool_result: StructuredToolResult
) -> int:
message = {
"role": "tool",
"content": format_tool_result_data(structured_tool_result),
}
return llm.count_tokens_for_message([message])
66 changes: 44 additions & 22 deletions holmes/core/tools_utils/tool_context_window_limiter.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,55 @@
from typing import Optional
from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
from holmes.core.llm import LLM
from holmes.core.tools import StructuredToolResultStatus
from holmes.core.models import ToolCallResult
from holmes.utils import sentry_helper


def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
context_window_size = llm.get_context_window_size()

if 0 < percent_of_total_context_window and percent_of_total_context_window <= 100:
return int(context_window_size * percent_of_total_context_window // 100)
else:
return context_window_size


def get_max_token_count_for_single_tool(llm: LLM) -> int:
return get_pct_token_count(
percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
)


def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
if (
tool_call_result.result.status == StructuredToolResultStatus.SUCCESS
and 0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
):
message = tool_call_result.as_tool_call_message()

messages_token = llm.count_tokens_for_message(messages=[message])
context_window_size = llm.get_context_window_size()
max_tokens_allowed: int = int(
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
max_tokens_allowed = get_max_token_count_for_single_tool(llm)

message = tool_call_result.as_tool_call_message()
messages_token = llm.count_tokens_for_message(messages=[message])

if messages_token > max_tokens_allowed:
relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100

error_message: Optional[str] = (
f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
)

if messages_token > max_tokens_allowed:
relative_pct = (
(messages_token - max_tokens_allowed) / messages_token
) * 100
error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
tool_call_result.result.status = StructuredToolResultStatus.ERROR
tool_call_result.result.data = None
tool_call_result.result.error = error_message

sentry_helper.capture_toolcall_contains_too_many_tokens(
tool_call_result, messages_token, max_tokens_allowed
if tool_call_result.result.status == StructuredToolResultStatus.NO_DATA:
error_message = None
# tool_call_result.result.data is set to None below which is expected to fix the issue
elif tool_call_result.result.status == StructuredToolResultStatus.ERROR:
original_error = (
tool_call_result.result.error
or tool_call_result.result.data
or "Unknown error"
)
truncated_error = str(original_error)[:100]
error_message = f"The tool call returned an error it is too large to return\nThe following original error is truncated:\n{truncated_error}"

tool_call_result.result.status = StructuredToolResultStatus.ERROR
tool_call_result.result.data = None
tool_call_result.result.error = error_message

sentry_helper.capture_toolcall_contains_too_many_tokens(
tool_call_result, messages_token, max_tokens_allowed
)
17 changes: 11 additions & 6 deletions holmes/core/tools_utils/tool_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
StructuredToolResultStatus,
Toolset,
ToolsetStatusEnum,
ToolInvokeContext,
)
from holmes.core.tools_utils.toolset_utils import filter_out_default_logging_toolset

Expand Down Expand Up @@ -46,16 +47,20 @@ def __init__(self, toolsets: List[Toolset]):
)
self.tools_by_name[tool.name] = tool

def invoke(self, tool_name: str, params: dict) -> StructuredToolResult:
def invoke(
self, tool_name: str, params: dict, context: ToolInvokeContext
) -> StructuredToolResult:
"""TODO: remove this function as it seems unused.
We call tool_executor.get_tool_by_name() and then tool.invoke() directly instead of this invoke function
"""
tool = self.get_tool_by_name(tool_name)
return (
tool.invoke(params)
if tool
else StructuredToolResult(
if not tool:
return StructuredToolResult(
status=StructuredToolResultStatus.ERROR,
error=f"Could not find tool named {tool_name}",
)
)

return tool.invoke(params, context)

def get_tool_by_name(self, name: str) -> Optional[Tool]:
if name in self.tools_by_name:
Expand Down
25 changes: 7 additions & 18 deletions holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from holmes.core.tools import (
CallablePrerequisite,
Tool,
ToolInvokeContext,
ToolParameter,
Toolset,
ToolsetTag,
Expand Down Expand Up @@ -118,9 +119,7 @@ def get_parameterized_one_liner(self, params) -> str:
project_id = self.toolset.config.get("project_id", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Alerts ({project_id})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
url = "https://cloud.mongodb.com/api/atlas/v2/groups/{project_id}/alerts".format(
project_id=self.toolset.config.get("project_id")
Expand All @@ -145,9 +144,7 @@ def get_parameterized_one_liner(self, params) -> str:
project_id = self.toolset.config.get("project_id", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Processes ({project_id})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
url = "https://cloud.mongodb.com/api/atlas/v2/groups/{project_id}/processes".format(
project_id=self.toolset.config.get("project_id")
Expand Down Expand Up @@ -180,9 +177,7 @@ def get_parameterized_one_liner(self, params) -> str:
process_id = params.get("process_id", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Slow Queries ({process_id})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
url = self.url.format(
project_id=self.toolset.config.get("project_id"),
Expand All @@ -209,9 +204,7 @@ def get_parameterized_one_liner(self, params) -> str:
project_id = self.toolset.config.get("project_id", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Events ({project_id})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
params.update({"itemsPerPage": 500})
try:
now_utc = datetime.now(timezone.utc)
Expand Down Expand Up @@ -268,9 +261,7 @@ def get_parameterized_one_liner(self, params) -> str:
hostname = params.get("hostName", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Host Logs ({hostname})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
one_hour_ago = datetime.now(timezone.utc) - timedelta(hours=1)
try:
url = self.url.format(
Expand Down Expand Up @@ -324,9 +315,7 @@ def get_parameterized_one_liner(self, params) -> str:
event_type = params.get("eventType", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Event Details ({event_type})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
url = self.url.format(projectId=self.toolset.config.get("project_id"))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from holmes.core.tools import (
StructuredToolResult,
ToolInvokeContext,
ToolParameter,
StructuredToolResultStatus,
)
Expand Down Expand Up @@ -217,9 +218,7 @@ def _build_connection_failures_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
# Get configuration
db_config = self.toolset.database_config()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from holmes.core.tools import (
StructuredToolResult,
ToolInvokeContext,
ToolParameter,
StructuredToolResultStatus,
)
Expand Down Expand Up @@ -155,9 +156,7 @@ def _build_connection_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
hours_back = params.get("hours_back", 2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from typing import Dict
from datetime import datetime, timezone

from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.core.tools import (
StructuredToolResult,
StructuredToolResultStatus,
ToolInvokeContext,
)
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
BaseAzureSQLTool,
BaseAzureSQLToolset,
Expand Down Expand Up @@ -131,9 +135,7 @@ def _build_health_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
db_config = self.toolset.database_config()
client = self.toolset.api_client()
Expand Down
Loading
Loading