Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5f9af5b
feat: add ability for tool to count tokens
nherment Sep 23, 2025
53945d7
feat: refactor tool.invoke() calls to use new context
nherment Sep 23, 2025
8c8d14b
feat: refactor tool.invoke() calls to use new context
nherment Sep 23, 2025
d9808a4
feat: refactor tool.invoke() calls to use new context
nherment Sep 23, 2025
b6ea5bf
fix test
nherment Sep 23, 2025
5b8e55a
feat: use context for new invoke arg
nherment Sep 23, 2025
80bfe84
fix test
nherment Sep 23, 2025
d05ac95
fix test
nherment Sep 23, 2025
94982af
fix test
nherment Sep 23, 2025
c23115d
fix test
nherment Sep 23, 2025
04a2a40
prometheus token count WIP
nherment Sep 24, 2025
3fa2fcf
prometheus token count
nherment Sep 24, 2025
d9f6702
feat: truncate logs based on token limit
nherment Sep 24, 2025
34a7bb7
fix: correct message format for tool token counting
nherment Sep 24, 2025
01543c0
fix: logs truncation logic
nherment Sep 24, 2025
3cb77fb
chore: address PR comments
nherment Sep 24, 2025
fb94fa9
improvements
nherment Oct 1, 2025
3187728
chore: address PR comments
nherment Oct 1, 2025
1279783
Merge branch 'master' into ROB-2136_link_tool_limit_to_global_truncat…
nherment Oct 1, 2025
515b33c
chore: address PR comments
nherment Oct 1, 2025
9d24e9b
fix: incorrect comment regarding context window tool size limit
nherment Oct 1, 2025
fe92ae3
chore: address PR comments
nherment Oct 1, 2025
b8eda3d
Merge branch 'master' into ROB-2136_link_tool_limit_to_global_truncat…
nherment Oct 1, 2025
faf71ba
chore: address PR comments
nherment Oct 1, 2025
150f090
chore: address PR comments
nherment Oct 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,13 @@
from holmes.core.resource_instruction import ResourceInstructions
from holmes.core.runbooks import RunbookManager
from holmes.core.safeguards import prevent_overly_repeated_tool_call
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.core.tools import (
StructuredToolResult,
StructuredToolResultStatus,
ToolInvokeContext,
)
from holmes.core.tools_utils.tool_context_window_limiter import (
get_max_token_count_for_single_tool,
prevent_overly_big_tool_response,
)
from holmes.plugins.prompts import load_and_render_prompt
Expand Down Expand Up @@ -622,9 +627,13 @@ def _directly_invoke_tool_call(
)

try:
tool_response = tool.invoke(
tool_params, tool_number=tool_number, user_approved=user_approved
invoke_context = ToolInvokeContext(
tool_number=tool_number,
user_approved=user_approved,
llm=self.llm,
max_token_count=get_max_token_count_for_single_tool(self.llm),
)
tool_response = tool.invoke(tool_params, context=invoke_context)
except Exception as e:
logging.error(
f"Tool call to {tool_name} failed with an Exception", exc_info=True
Expand Down
27 changes: 20 additions & 7 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
)
from rich.console import Console

from holmes.core.llm import LLM
from holmes.core.openai_formatting import format_tool_to_open_ai_standard
from holmes.plugins.prompts import load_and_render_prompt
from holmes.core.transformers import (
Expand Down Expand Up @@ -159,6 +160,15 @@ class ToolParameter(BaseModel):
items: Optional["ToolParameter"] = None # For array item schemas


class ToolInvokeContext(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)

tool_number: Optional[int] = None
user_approved: bool = False
llm: LLM
max_token_count: Optional[int] = None


class Tool(ABC, BaseModel):
name: str
description: str
Expand Down Expand Up @@ -225,15 +235,14 @@ def get_openai_format(self, target_model: str):
def invoke(
self,
params: Dict,
tool_number: Optional[int] = None,
user_approved: bool = False,
context: ToolInvokeContext,
) -> StructuredToolResult:
tool_number_str = f"#{tool_number} " if tool_number else ""
tool_number_str = f"#{context.tool_number} " if context.tool_number else ""
logger.info(
f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
)
start_time = time.time()
result = self._invoke(params=params, user_approved=user_approved)
result = self._invoke(params=params, context=context)
result.icon_url = self.icon_url

# Apply transformers to the result
Expand All @@ -244,7 +253,7 @@ def invoke(
if hasattr(transformed_result, "get_stringified_data")
else str(transformed_result)
)
show_hint = f"/show {tool_number}" if tool_number else "/show"
show_hint = f"/show {context.tool_number}" if context.tool_number else "/show"
line_count = output_str.count("\n") + 1 if output_str else 0
logger.info(
f" [dim]Finished {tool_number_str}in {elapsed:.2f}s, output length: {len(output_str):,} characters ({line_count:,} lines) - {show_hint} to view contents[/dim]"
Expand Down Expand Up @@ -340,7 +349,9 @@ def _apply_transformers(self, result: StructuredToolResult) -> StructuredToolRes

@abstractmethod
def _invoke(
self, params: dict, user_approved: bool = False
self,
params: dict,
context: ToolInvokeContext,
) -> StructuredToolResult:
"""
params: the tool params
Expand Down Expand Up @@ -400,7 +411,9 @@ def _get_status(
return StructuredToolResultStatus.SUCCESS

def _invoke(
self, params: dict, user_approved: bool = False
self,
params: dict,
context: ToolInvokeContext,
) -> StructuredToolResult:
if self.command is not None:
raw_output, return_code, invocation = self.__invoke_command(params)
Expand Down
21 changes: 13 additions & 8 deletions holmes/core/tools_utils/tool_context_window_limiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,24 @@
from holmes.utils import sentry_helper


def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
def get_max_token_count_for_single_tool(llm: LLM) -> int:
context_window_size = llm.get_context_window_size()

if (
tool_call_result.result.status == StructuredToolResultStatus.SUCCESS
and 0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
):
message = tool_call_result.as_tool_call_message()
return int(context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100)
else:
return context_window_size


def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
if tool_call_result.result.status == StructuredToolResultStatus.SUCCESS:
max_tokens_allowed = get_max_token_count_for_single_tool(llm)

message = tool_call_result.as_tool_call_message()
messages_token = llm.count_tokens_for_message(messages=[message])
context_window_size = llm.get_context_window_size()
max_tokens_allowed: int = int(
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
)

if messages_token > max_tokens_allowed:
relative_pct = (
Expand Down
18 changes: 11 additions & 7 deletions holmes/core/tools_utils/tool_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
StructuredToolResultStatus,
Toolset,
ToolsetStatusEnum,
ToolInvokeContext,
)
from holmes.core.llm import LLM
from holmes.core.tools_utils.toolset_utils import filter_out_default_logging_toolset


class ToolExecutor:
def __init__(self, toolsets: List[Toolset]):
def __init__(self, toolsets: List[Toolset], llm: Optional[LLM] = None):
# TODO: expose function for this instead of callers accessing directly
self.toolsets = toolsets
self.llm = llm

enabled_toolsets: list[Toolset] = list(
filter(
Expand Down Expand Up @@ -46,16 +49,17 @@ def __init__(self, toolsets: List[Toolset]):
)
self.tools_by_name[tool.name] = tool

def invoke(self, tool_name: str, params: dict) -> StructuredToolResult:
def invoke(
self, tool_name: str, params: dict, context: ToolInvokeContext
) -> StructuredToolResult:
tool = self.get_tool_by_name(tool_name)
return (
tool.invoke(params)
if tool
else StructuredToolResult(
if not tool:
return StructuredToolResult(
status=StructuredToolResultStatus.ERROR,
error=f"Could not find tool named {tool_name}",
)
)

return tool.invoke(params, context)

def get_tool_by_name(self, name: str) -> Optional[Tool]:
if name in self.tools_by_name:
Expand Down
25 changes: 7 additions & 18 deletions holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from holmes.core.tools import (
CallablePrerequisite,
Tool,
ToolInvokeContext,
ToolParameter,
Toolset,
ToolsetTag,
Expand Down Expand Up @@ -118,9 +119,7 @@ def get_parameterized_one_liner(self, params) -> str:
project_id = self.toolset.config.get("project_id", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Alerts ({project_id})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
url = "https://cloud.mongodb.com/api/atlas/v2/groups/{project_id}/alerts".format(
project_id=self.toolset.config.get("project_id")
Expand All @@ -145,9 +144,7 @@ def get_parameterized_one_liner(self, params) -> str:
project_id = self.toolset.config.get("project_id", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Processes ({project_id})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
url = "https://cloud.mongodb.com/api/atlas/v2/groups/{project_id}/processes".format(
project_id=self.toolset.config.get("project_id")
Expand Down Expand Up @@ -180,9 +177,7 @@ def get_parameterized_one_liner(self, params) -> str:
process_id = params.get("process_id", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Slow Queries ({process_id})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
url = self.url.format(
project_id=self.toolset.config.get("project_id"),
Expand All @@ -209,9 +204,7 @@ def get_parameterized_one_liner(self, params) -> str:
project_id = self.toolset.config.get("project_id", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Project Events ({project_id})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
params.update({"itemsPerPage": 500})
try:
now_utc = datetime.now(timezone.utc)
Expand Down Expand Up @@ -268,9 +261,7 @@ def get_parameterized_one_liner(self, params) -> str:
hostname = params.get("hostName", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Host Logs ({hostname})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
one_hour_ago = datetime.now(timezone.utc) - timedelta(hours=1)
try:
url = self.url.format(
Expand Down Expand Up @@ -324,9 +315,7 @@ def get_parameterized_one_liner(self, params) -> str:
event_type = params.get("eventType", "")
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Event Details ({event_type})"

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
url = self.url.format(projectId=self.toolset.config.get("project_id"))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from holmes.core.tools import (
StructuredToolResult,
ToolInvokeContext,
ToolParameter,
StructuredToolResultStatus,
)
Expand Down Expand Up @@ -217,9 +218,7 @@ def _build_connection_failures_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
# Get configuration
db_config = self.toolset.database_config()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from holmes.core.tools import (
StructuredToolResult,
ToolInvokeContext,
ToolParameter,
StructuredToolResultStatus,
)
Expand Down Expand Up @@ -155,9 +156,7 @@ def _build_connection_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
hours_back = params.get("hours_back", 2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from typing import Dict
from datetime import datetime, timezone

from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.core.tools import (
StructuredToolResult,
StructuredToolResultStatus,
ToolInvokeContext,
)
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
BaseAzureSQLTool,
BaseAzureSQLToolset,
Expand Down Expand Up @@ -131,9 +135,7 @@ def _build_health_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
db_config = self.toolset.database_config()
client = self.toolset.api_client()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from typing import Any, Dict, List, Tuple, cast
from datetime import datetime, timezone

from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.core.tools import (
StructuredToolResult,
StructuredToolResultStatus,
ToolInvokeContext,
)
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
BaseAzureSQLTool,
BaseAzureSQLToolset,
Expand Down Expand Up @@ -192,9 +196,7 @@ def _build_performance_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
db_config = self.toolset.database_config()
client = self.toolset.api_client()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from holmes.core.tools import (
StructuredToolResult,
ToolInvokeContext,
ToolParameter,
StructuredToolResultStatus,
)
Expand Down Expand Up @@ -253,9 +254,7 @@ def _build_storage_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
hours_back = params.get("hours_back", 24)
top_tables = params.get("top_tables", 20)
Expand Down
10 changes: 6 additions & 4 deletions holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from typing import Dict
from datetime import datetime, timezone

from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.core.tools import (
StructuredToolResult,
StructuredToolResultStatus,
ToolInvokeContext,
)
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
BaseAzureSQLTool,
BaseAzureSQLToolset,
Expand Down Expand Up @@ -147,9 +151,7 @@ def _build_alerts_report(

return "\n".join(report_sections)

def _invoke(
self, params: dict, user_approved: bool = False
) -> StructuredToolResult:
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
try:
db_config = self.toolset.database_config()
api_client = self.toolset.api_client()
Expand Down
Loading
Loading