Skip to content

feat(anthropic): dynamic mapping of Max Tokens for Anthropic #31946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Aug 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions libs/partners/anthropic/langchain_anthropic/chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
from functools import cached_property
from operator import itemgetter
from typing import Any, Callable, Literal, Optional, Union, cast
from typing import Any, Callable, Final, Literal, Optional, Union, cast

import anthropic
from langchain_core._api import beta, deprecated
Expand Down Expand Up @@ -61,6 +61,32 @@
}


_MODEL_DEFAULT_MAX_OUTPUT_TOKENS: Final[dict[str, int]] = {
"claude-opus-4-1": 32000,
"claude-opus-4": 32000,
"claude-sonnet-4": 64000,
"claude-3-7-sonnet": 64000,
"claude-3-5-sonnet": 8192,
"claude-3-5-haiku": 8192,
"claude-3-haiku": 4096,
}
_FALLBACK_MAX_OUTPUT_TOKENS: Final[int] = 4096


def _default_max_tokens_for(model: str | None) -> int:
"""Return the default max output tokens for an Anthropic model (with fallback).

Can find the Max Tokens limits here: https://docs.anthropic.com/en/docs/about-claude/models/overview#model-comparison-table
"""
if not model:
return _FALLBACK_MAX_OUTPUT_TOKENS

parts = model.split("-")
family = "-".join(parts[:-1]) if len(parts) > 1 else model

return _MODEL_DEFAULT_MAX_OUTPUT_TOKENS.get(family, _FALLBACK_MAX_OUTPUT_TOKENS)


class AnthropicTool(TypedDict):
"""Anthropic tool definition."""

Expand Down Expand Up @@ -1205,7 +1231,7 @@ def get_weather(location: str) -> str:
model: str = Field(alias="model_name")
"""Model name to use."""

max_tokens: int = Field(default=1024, alias="max_tokens_to_sample")
max_tokens: Optional[int] = Field(default=None, alias="max_tokens_to_sample")
"""Denotes the number of tokens to predict per generation."""

temperature: Optional[float] = None
Expand Down Expand Up @@ -1343,6 +1369,15 @@ def _get_ls_params(
ls_params["ls_stop"] = ls_stop
return ls_params

@model_validator(mode="before")
@classmethod
def set_default_max_tokens(cls, values: dict[str, Any]) -> Any:
"""Set default max_tokens."""
if values.get("max_tokens") is None:
model = values.get("model") or values.get("model_name")
values["max_tokens"] = _default_max_tokens_for(model)
return values

@model_validator(mode="before")
@classmethod
def build_extra(cls, values: dict) -> Any:
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,10 @@ class color_picker(BaseModel):

@pytest.mark.vcr
def test_web_search() -> None:
llm = ChatAnthropic(model="claude-3-5-sonnet-latest") # type: ignore[call-arg]
llm = ChatAnthropic(
model="claude-3-5-sonnet-latest", # type: ignore[call-arg]
max_tokens=1024,
)

tool = {"type": "web_search_20250305", "name": "web_search", "max_uses": 1}
llm_with_tools = llm.bind_tools([tool])
Expand Down
39 changes: 39 additions & 0 deletions libs/partners/anthropic/tests/unit_tests/test_chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,45 @@ def test_anthropic_proxy_from_environment() -> None:
assert llm.anthropic_proxy == explicit_proxy


def test_set_default_max_tokens() -> None:
"""Test the set_default_max_tokens function."""
# Test claude-opus-4 models
llm = ChatAnthropic(model="claude-opus-4-20250514", anthropic_api_key="test")
assert llm.max_tokens == 32000

# Test claude-sonnet-4 models
llm = ChatAnthropic(model="claude-sonnet-4-latest", anthropic_api_key="test")
assert llm.max_tokens == 64000

# Test claude-3-7-sonnet models
llm = ChatAnthropic(model="claude-3-7-sonnet-latest", anthropic_api_key="test")
assert llm.max_tokens == 64000

# Test claude-3-5-sonnet models
llm = ChatAnthropic(model="claude-3-5-sonnet-latest", anthropic_api_key="test")
assert llm.max_tokens == 8192

# Test claude-3-5-haiku models
llm = ChatAnthropic(model="claude-3-5-haiku-latest", anthropic_api_key="test")
assert llm.max_tokens == 8192

# Test claude-3-haiku models (should default to 4096)
llm = ChatAnthropic(model="claude-3-haiku-latest", anthropic_api_key="test")
assert llm.max_tokens == 4096

# Test that existing max_tokens values are preserved
llm = ChatAnthropic(
model="claude-3-5-sonnet-latest", max_tokens=2048, anthropic_api_key="test"
)
assert llm.max_tokens == 2048

# Test that explicitly set max_tokens values are preserved
llm = ChatAnthropic(
model="claude-3-5-sonnet-latest", max_tokens=4096, anthropic_api_key="test"
)
assert llm.max_tokens == 4096


@pytest.mark.requires("anthropic")
def test_anthropic_model_name_param() -> None:
llm = ChatAnthropic(model_name="foo") # type: ignore[call-arg, call-arg]
Expand Down