[SDK] Enable resource specification for trial containers

droctothorpe · shipengcheng1230 · droctothorpe · commit abd614d711e3 · 2023-08-03T17:00:58.000-04:00
Co-authored-by: shipengcheng1230 &lt;shipengcheng1230@gmail.com&gt;
diff --git a/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py b/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py
@@ -16,7 +16,7 @@
 import multiprocessing
 import textwrap
 import time
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import grpc
 import kubeflow.katib.katib_api_pb2 as katib_api_pb2
@@ -147,6 +147,7 @@ def tune(
         retain_trials: bool = False,
         packages_to_install: List[str] = None,
         pip_index_url: str = "https://pypi.org/simple",
+        resources_per_trial: Union[dict, client.V1ResourceRequirements, None] = None,
     ):
         """Create HyperParameter Tuning Katib Experiment from the objective function.
 
@@ -182,6 +183,20 @@ def tune(
                 to the base image packages. These packages are installed before
                 executing the objective function.
             pip_index_url: The PyPI url from which to install Python packages.
+            resources_per trial: A parameter that lets you specify how much
+            resources each trial container should have. You can either specify a
+            kubernetes.client.V1ResourceRequirements object (documented here:
+            https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1ResourceRequirements.md)
+            or a dictionary that includes one or more of the following keys:
+            `cpu`, `memory`, or `gpu` (other keys will be ignored). Appropriate
+            values for these keys are documented here:
+            https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/.
+            For example:
+                {
+                    "cpu": "1",
+                    "memory": "2Gi",
+                }
+            This parameter is optional and defaults to None.
 
         Raises:
             ValueError: Objective function has invalid arguments.
@@ -280,6 +295,23 @@ def tune(
                 + exec_script
             )
 
+        resources = client.V1ResourceRequirements()
+        if isinstance(resources_per_trial, dict):
+            requests = {
+                "cpu": "200m",
+                "memory": "256Mi",
+            }
+            if "gpu" in resources_per_trial:
+                resources_per_trial["nvidia.com/gpu"] = resources_per_trial.pop("gpu")
+            requests.update(resources_per_trial)
+
+            resources = client.V1ResourceRequirements(
+                requests=requests,
+                limits=requests,
+            )
+        else:
+            resources = resources_per_trial
+
         # Create Trial specification.
         trial_spec = client.V1Job(
             api_version="batch/v1",
@@ -297,6 +329,7 @@ def tune(
                                 image=base_image,
                                 command=["bash", "-c"],
                                 args=[exec_script],
+                                resources=resources,
                             )
                         ],
                     ),