Skip to content

Commit c5d1f84

Browse files
[Opik-2168] Add cost tracking support for ChatBedrock and ChatBedrockConverse in langchain, expose bedrock as an officially supported provider (#2862)
* Draft * Start using new usage extractors in OpikTracer * Fix lint errors * Fix lint errors * Refactor usage extractors, add new mappers to LangChainUsage * Simplified if statements in usage extractors * Fix lint errors * Add default values to langchain usage structure * Change default values from int 0 to None * Add map_to_groq_completions_usage * Update langchain openai test to better reflect the expected usage structure * Add docstrings to LLMUsageInfo model * Expose bedrock provider in the LLMProvider enum as officially supported one * Draft implementation * Add streaming mode test * Add fixture to ensure bedrock is configured, add more tests for async use case. * Remove unnecessary comments * Move boto3 import to ensure_aws_bedrock_configured fixture * Rename tests * Update env var name for aws region in langchain tests workflow file * Propagate AWS creds from secrets to env vars * assume correct role for bedrock * skip tag session * Update extractor logic, add support for ChatBedrockConverse * Fix lint errors * Update parameter settings for chat models in tests --------- Co-authored-by: Alex Bezpalko <[email protected]>
1 parent 423f344 commit c5d1f84

File tree

12 files changed

+507
-12
lines changed

12 files changed

+507
-12
lines changed

.github/workflows/lib-langchain-tests.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ env:
1212
GOOGLE_CLOUD_PROJECT: opik-sdk-tests
1313
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
1414
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
15+
AWS_DEFAULT_REGION: us-east-1
16+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
17+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
1518
OPIK_ENABLE_LITELLM_MODELS_MONITORING: False
1619
OPIK_SENTRY_ENABLE: False
1720
on:
@@ -52,7 +55,17 @@ jobs:
5255
cd ./tests
5356
pip install --no-cache-dir --disable-pip-version-check -r library_integration/langchain/requirements.txt
5457
58+
- name: change aws role
59+
uses: aws-actions/configure-aws-credentials@v4
60+
with:
61+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
62+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
63+
role-to-assume: ${{ vars.BEDROCK_ROLE }}
64+
aws-region: us-east-1
65+
role-chaining: true
66+
role-skip-session-tagging: true
67+
5568
- name: Run tests
5669
run: |
5770
cd ./tests/library_integration/langchain/
58-
python -m pytest -vv .
71+
python -m pytest -vv .

sdks/python/src/opik/integrations/bedrock/converse_decorator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
33
from typing_extensions import override
44

5+
import opik
56
import opik.dict_utils as dict_utils
67
import opik.llm_usage as llm_usage
78
from opik.api_objects import span
@@ -63,7 +64,7 @@ def _end_span_inputs_preprocessor(
6364
) -> arguments_helpers.EndSpanParameters:
6465
usage = output["usage"]
6566
usage_in_openai_format = llm_usage.try_build_opik_usage_or_log_error(
66-
provider="_bedrock",
67+
provider=opik.LLMProvider.BEDROCK,
6768
usage=usage,
6869
logger=LOGGER,
6970
error_message="Failed to log token usage from bedrock LLM call",
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import logging
2+
from typing import TYPE_CHECKING, Any, Dict, Optional, List
3+
4+
import opik
5+
from opik import llm_usage
6+
from . import provider_usage_extractor_protocol
7+
from . import langchain_run_helpers
8+
9+
if TYPE_CHECKING:
10+
pass
11+
12+
LOGGER = logging.getLogger(__name__)
13+
14+
15+
class BedrockUsageExtractor(
16+
provider_usage_extractor_protocol.ProviderUsageExtractorProtocol
17+
):
18+
PROVIDER = opik.LLMProvider.BEDROCK
19+
20+
def is_provider_run(self, run_dict: Dict[str, Any]) -> bool:
21+
try:
22+
if run_dict.get("serialized") is None:
23+
return False
24+
25+
class_id: List[str] = run_dict.get("serialized", {}).get("id", [])
26+
if len(class_id) == 0:
27+
return False
28+
29+
class_name = class_id[-1]
30+
is_bedrock = "ChatBedrock" in class_name
31+
32+
return is_bedrock
33+
34+
except Exception:
35+
LOGGER.debug(
36+
"Failed to check if Run instance is from Bedrock LLM, returning False.",
37+
exc_info=True,
38+
)
39+
return False
40+
41+
def get_llm_usage_info(self, run_dict: Dict[str, Any]) -> llm_usage.LLMUsageInfo:
42+
usage_dict = _try_get_token_usage(run_dict)
43+
model = _try_get_model_name(run_dict)
44+
45+
return llm_usage.LLMUsageInfo(
46+
provider=self.PROVIDER, model=model, usage=usage_dict
47+
)
48+
49+
50+
def _try_get_token_usage(run_dict: Dict[str, Any]) -> Optional[llm_usage.OpikUsage]:
51+
try:
52+
langchain_usage = langchain_run_helpers.try_get_token_usage(run_dict)
53+
bedrock_usage_dict = langchain_usage.map_to_bedrock_usage()
54+
55+
opik_usage = llm_usage.OpikUsage.from_bedrock_dict(bedrock_usage_dict)
56+
return opik_usage
57+
except Exception:
58+
LOGGER.warning(
59+
"Failed to extract token usage from presumably Bedrock LLM langchain run.",
60+
exc_info=True,
61+
)
62+
return None
63+
64+
65+
def _try_get_model_name(run_dict: Dict[str, Any]) -> Optional[str]:
66+
MODEL_NAME_KEY = "model_id"
67+
68+
model = run_dict.get("serialized", {}).get("kwargs", {}).get(MODEL_NAME_KEY, None)
69+
70+
if model is None:
71+
LOGGER.error(
72+
"Failed to extract model name from presumably Bedrock LLM langchain Run object: %s",
73+
run_dict,
74+
)
75+
76+
return model

sdks/python/src/opik/integrations/langchain/provider_usage_extractors/langchain_run_helpers/langchain_usage.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ def map_to_anthropic_usage(self) -> Dict[str, Any]:
108108

109109
return anthropic_usage
110110

111+
def map_to_bedrock_usage(self) -> Dict[str, Any]:
112+
bedrock_usage: Dict[str, Any] = {
113+
"inputTokens": self.input_tokens,
114+
"outputTokens": self.output_tokens,
115+
}
116+
return bedrock_usage
117+
111118
def map_to_openai_completions_usage(self) -> Dict[str, Any]:
112119
openai_usage: Dict[str, Any] = {
113120
"prompt_tokens": self.input_tokens,

sdks/python/src/opik/integrations/langchain/provider_usage_extractors/usage_extractor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
vertexai_usage_extractor,
1010
groq_usage_extractor,
1111
anthropic_vertexai_usage_extractor,
12+
bedrock_usage_extractor,
1213
)
1314
from . import provider_usage_extractor_protocol
1415

@@ -23,6 +24,7 @@
2324
vertexai_usage_extractor.VertexAIUsageExtractor(),
2425
groq_usage_extractor.GroqUsageExtractor(),
2526
anthropic_vertexai_usage_extractor.AnthropicVertexAIUsageExtractor(),
27+
bedrock_usage_extractor.BedrockUsageExtractor(),
2628
]
2729

2830

sdks/python/src/opik/llm_usage/bedrock_usage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55
class BedrockUsage(base_original_provider_usage.BaseOriginalProviderUsage):
6-
"""Anthropic calls token usage data. Updated 11.03.2025"""
6+
"""Bedrock calls token usage data. Updated 11.03.2025"""
77

88
inputTokens: int
99
"""The number of input tokens which were used."""

sdks/python/src/opik/llm_usage/opik_usage_factory.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
from . import opik_usage
66

77

8-
# One provider can have multiple formats of usage dicts, so it could be many build functions
9-
# if provider's name specified as string and not as LLMProvider enum value -
10-
# it means that we do not support cost tracking for this provider (but support usage info)
8+
# One provider can have multiple formats of usage dicts, so it can have more than 1 build function
119
_PROVIDER_TO_OPIK_USAGE_BUILDERS: Dict[
1210
Union[str, LLMProvider],
1311
List[Callable[[Dict[str, Any]], opik_usage.OpikUsage]],
@@ -19,7 +17,7 @@
1917
LLMProvider.GOOGLE_VERTEXAI: [opik_usage.OpikUsage.from_google_dict],
2018
LLMProvider.GOOGLE_AI: [opik_usage.OpikUsage.from_google_dict],
2119
LLMProvider.ANTHROPIC: [opik_usage.OpikUsage.from_anthropic_dict],
22-
"_bedrock": [opik_usage.OpikUsage.from_bedrock_dict],
20+
LLMProvider.BEDROCK: [opik_usage.OpikUsage.from_bedrock_dict],
2321
}
2422

2523

sdks/python/src/opik/types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ class LLMProvider(str, enum.Enum):
3434
GROQ = "groq"
3535
"""Used for models hosted by Groq. https://groq.com"""
3636

37+
BEDROCK = "bedrock"
38+
"""Used for models hosted by AWS Bedrock. https://aws.amazon.com/bedrock"""
39+
3740
@classmethod
3841
def has_value(cls, value: str) -> bool:
3942
return value in [enum_item.value for enum_item in cls]

sdks/python/tests/conftest.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,14 @@ def temp_file_15mb():
190190
yield f
191191

192192

193-
@pytest.fixture()
193+
@pytest.fixture(scope="session")
194194
def ensure_openai_configured():
195195
# don't use assertion here to prevent printing os.environ with all env variables
196196
if not ("OPENAI_API_KEY" in os.environ and "OPENAI_ORG_ID" in os.environ):
197197
raise Exception("OpenAI not configured!")
198198

199199

200-
@pytest.fixture
200+
@pytest.fixture(scope="session")
201201
def ensure_google_project_and_location_configured():
202202
if not (
203203
"GOOGLE_CLOUD_PROJECT" in os.environ and "GOOGLE_CLOUD_LOCATION" in os.environ
@@ -207,15 +207,15 @@ def ensure_google_project_and_location_configured():
207207
)
208208

209209

210-
@pytest.fixture()
210+
@pytest.fixture(scope="session")
211211
def ensure_anthropic_configured():
212212
# don't use assertion here to prevent printing os.environ with all env variables
213213

214214
if "ANTHROPIC_API_KEY" not in os.environ:
215215
raise Exception("Anthropic not configured!")
216216

217217

218-
@pytest.fixture
218+
@pytest.fixture(scope="session")
219219
def ensure_vertexai_configured(ensure_google_project_and_location_configured):
220220
GOOGLE_APPLICATION_CREDENTIALS_PATH = "gcp_credentials.json"
221221

@@ -246,8 +246,23 @@ def ensure_vertexai_configured(ensure_google_project_and_location_configured):
246246
os.remove(GOOGLE_APPLICATION_CREDENTIALS_PATH)
247247

248248

249-
@pytest.fixture()
249+
@pytest.fixture(scope="session")
250250
def ensure_google_api_configured():
251251
GOOGLE_API_KEY = "GOOGLE_API_KEY"
252252
if GOOGLE_API_KEY not in os.environ:
253253
raise Exception(f"{GOOGLE_API_KEY} env var must be set")
254+
255+
256+
@pytest.fixture(scope="session")
257+
def ensure_aws_bedrock_configured():
258+
import boto3
259+
260+
session = boto3.Session()
261+
262+
bedrock_client = session.client(service_name="bedrock")
263+
try:
264+
available_models = bedrock_client.list_foundation_models()["modelSummaries"]
265+
if not available_models:
266+
raise Exception("AWS Bedrock not configured! No models available")
267+
except Exception as e:
268+
raise Exception(f"AWS Bedrock not configured! {e}")

sdks/python/tests/library_integration/langchain/constants.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,13 @@
2727
"original_usage.prompt_tokens_details.audio_tokens": ANY_BUT_NONE,
2828
"original_usage.prompt_tokens_details.cached_tokens": ANY_BUT_NONE,
2929
}
30+
31+
BEDROCK_MODEL_FOR_TESTS = "us.anthropic.claude-sonnet-4-20250514-v1:0"
32+
33+
EXPECTED_BEDROCK_USAGE_LOGGED_FORMAT = {
34+
"prompt_tokens": ANY_BUT_NONE,
35+
"completion_tokens": ANY_BUT_NONE,
36+
"total_tokens": ANY_BUT_NONE,
37+
"original_usage.inputTokens": ANY_BUT_NONE,
38+
"original_usage.outputTokens": ANY_BUT_NONE,
39+
}

0 commit comments

Comments
 (0)