Skip to content

Commit 154a85b

Browse files
[GSoC] New Interface report_metrics in Python SDK (#2371)
* chore: add report_metrics. Signed-off-by: Electronic-Waste <[email protected]> * fix: modify the code according to the first review. Signed-off-by: Electronic-Waste <[email protected]> * chore: add validation for metrics value & rename katib_report_metrics.py to report_metrics.py. Signed-off-by: Electronic-Waste <[email protected]> * fix: update import path in __init__.py. Signed-off-by: Electronic-Waste <[email protected]> * fix: delete blank line. Signed-off-by: Electronic-Waste <[email protected]> * fix: update RuntimeError doc string & correct spelling error & add new line. Signed-off-by: Electronic-Waste <[email protected]> * fix: delete blank in the last line. Signed-off-by: Electronic-Waste <[email protected]> --------- Signed-off-by: Electronic-Waste <[email protected]>
1 parent f06906d commit 154a85b

File tree

4 files changed

+101
-1
lines changed

4 files changed

+101
-1
lines changed

sdk/python/v1beta1/kubeflow/katib/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@
7171

7272
# Import Katib API client.
7373
from kubeflow.katib.api.katib_client import KatibClient
74+
# Import Katib report metrics functions
75+
from kubeflow.katib.api.report_metrics import report_metrics
7476
# Import Katib helper functions.
7577
import kubeflow.katib.api.search as search
7678
# Import Katib helper constants.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Copyright 2024 The Kubeflow Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
from datetime import datetime, timezone
17+
from typing import Any, Dict
18+
19+
import grpc
20+
import kubeflow.katib.katib_api_pb2 as katib_api_pb2
21+
from kubeflow.katib.constants import constants
22+
from kubeflow.katib.utils import utils
23+
24+
def report_metrics(
25+
metrics: Dict[str, Any],
26+
db_manager_address: str = constants.DEFAULT_DB_MANAGER_ADDRESS,
27+
timeout: int = constants.DEFAULT_TIMEOUT,
28+
):
29+
"""Push Metrics Directly to Katib DB
30+
31+
Katib always passes Trial name as env variable `KATIB_TRIAL_NAME` to the training container.
32+
33+
Args:
34+
metrics: Dict of metrics pushed to Katib DB.
35+
For examle, `metrics = {"loss": 0.01, "accuracy": 0.99}`.
36+
db-manager-address: Address for the Katib DB Manager in this format: `ip-address:port`.
37+
timeout: Optional, gRPC API Server timeout in seconds to report metrics.
38+
39+
Raises:
40+
ValueError: The Trial name is not passed to environment variables.
41+
RuntimeError: Unable to push Trial metrics to Katib DB or
42+
metrics value has incorrect format (cannot be converted to type `float`).
43+
"""
44+
45+
# Get Trial's namespace and name
46+
namespace = utils.get_current_k8s_namespace()
47+
name = os.getenv("KATIB_TRIAL_NAME")
48+
if name is None:
49+
raise ValueError(
50+
"The Trial name is not passed to environment variables"
51+
)
52+
53+
# Get channel for grpc call to db manager
54+
db_manager_address = db_manager_address.split(":")
55+
channel = grpc.beta.implementations.insecure_channel(
56+
db_manager_address[0], int(db_manager_address[1])
57+
)
58+
59+
# Validate metrics value in dict
60+
for value in metrics.values():
61+
utils.validate_metrics_value(value)
62+
63+
# Dial katib db manager to report metrics
64+
with katib_api_pb2.beta_create_DBManager_stub(channel) as client:
65+
try:
66+
timestamp = datetime.now(timezone.utc).strftime(constants.RFC3339_FORMAT)
67+
client.ReportObservationLog(
68+
request=katib_api_pb2.ReportObservationLogRequest(
69+
trial_name=name,
70+
observation_logs=katib_api_pb2.ObservationLog(
71+
metric_logs=[
72+
katib_api_pb2.MetricLog(
73+
time_stamp=timestamp,
74+
metric=katib_api_pb2.Metric(name=name,value=str(value))
75+
)
76+
for name, value in metrics.items()
77+
]
78+
)
79+
),
80+
timeout=timeout,
81+
)
82+
except Exception as e:
83+
raise RuntimeError(
84+
f"Unable to push metrics to Katib DB for Trial {namespace}/{name}. Exception: {e}"
85+
)

sdk/python/v1beta1/kubeflow/katib/constants/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
# How long to wait in seconds for requests to the Kubernetes or gRPC API Server.
1818
DEFAULT_TIMEOUT = 120
1919

20+
# RFC3339 time format
21+
RFC3339_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
22+
2023
# Global CRD version
2124
KATIB_VERSION = os.environ.get("EXPERIMENT_VERSION", "v1beta1")
2225

sdk/python/v1beta1/kubeflow/katib/utils/utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import json
1616
import os
1717
import textwrap
18-
from typing import Callable
18+
from typing import Callable, Any
1919
import inspect
2020

2121
from kubeflow.katib import models
@@ -72,6 +72,16 @@ def print_experiment_status(experiment: models.V1beta1Experiment):
7272
print(f"Current Optimal Trial:\n {experiment.status.current_optimal_trial}")
7373
print(f"Experiment conditions:\n {experiment.status.conditions}")
7474

75+
def validate_metrics_value(value: Any):
76+
"""Validate if the metrics value can be converted to type `float`."""
77+
try:
78+
float(value)
79+
except Exception:
80+
raise ValueError(
81+
f"Invalid value {value} for metrics value. "
82+
"The metrics value should have or can be converted to type `float`. "
83+
)
84+
7585

7686
def validate_objective_function(objective: Callable):
7787

0 commit comments

Comments
 (0)