Skip to content

Commit 83ec74e

Browse files
committed
Consolidate the katib-cert-generator to the katib-controller
Signed-off-by: Yuki Iwai <[email protected]>
1 parent e69235d commit 83ec74e

File tree

44 files changed

+773
-757
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+773
-757
lines changed

.github/workflows/publish-core-images.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ jobs:
2626
dockerfile: cmd/db-manager/v1beta1/Dockerfile
2727
- component-name: katib-ui
2828
dockerfile: cmd/ui/v1beta1/Dockerfile
29-
- component-name: cert-generator
30-
dockerfile: cmd/cert-generator/v1beta1/Dockerfile
3129
- component-name: file-metrics-collector
3230
dockerfile: cmd/metricscollector/v1beta1/file-metricscollector/Dockerfile
3331
- component-name: tfevent-metrics-collector

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ Make sure that all Katib components are running:
179179
$ kubectl get pods -n kubeflow
180180
181181
NAME READY STATUS RESTARTS AGE
182-
katib-cert-generator-rw95w 0/1 Completed 0 35s
183182
katib-controller-566595bdd8-hbxgf 1/1 Running 0 36s
184183
katib-db-manager-57cd769cdb-4g99m 1/1 Running 0 36s
185184
katib-mysql-7894994f88-5d4s5 1/1 Running 0 36s

cmd/cert-generator/v1beta1/Dockerfile

Lines changed: 0 additions & 25 deletions
This file was deleted.

cmd/cert-generator/v1beta1/main.go

Lines changed: 0 additions & 42 deletions
This file was deleted.

cmd/katib-controller/v1beta1/main.go

Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,23 @@ import (
3333
"sigs.k8s.io/controller-runtime/pkg/log/zap"
3434
"sigs.k8s.io/controller-runtime/pkg/manager"
3535
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
36+
"sigs.k8s.io/controller-runtime/pkg/webhook"
3637

3738
configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
3839
apis "github.com/kubeflow/katib/pkg/apis/controller"
40+
cert "github.com/kubeflow/katib/pkg/cert-generator/v1beta1"
3941
"github.com/kubeflow/katib/pkg/controller.v1beta1"
4042
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
4143
"github.com/kubeflow/katib/pkg/util/v1beta1/katibconfig"
42-
webhook "github.com/kubeflow/katib/pkg/webhook/v1beta1"
44+
webhookv1beta1 "github.com/kubeflow/katib/pkg/webhook/v1beta1"
4345
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
4446
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
4547
)
4648

47-
var scheme = runtime.NewScheme()
49+
var (
50+
scheme = runtime.NewScheme()
51+
log = logf.Log.WithName("entrypoint")
52+
)
4853

4954
func init() {
5055
utilruntime.Must(apis.AddToScheme(scheme))
@@ -54,15 +59,12 @@ func init() {
5459

5560
func main() {
5661
logf.SetLogger(zap.New())
57-
log := logf.Log.WithName("entrypoint")
5862

5963
var katibConfigFile string
6064
flag.StringVar(&katibConfigFile, "katib-config", "",
6165
"The katib-controller will load its initial configuration from this file. "+
6266
"Omit this flag to use the default configuration values. ")
6367

64-
// TODO (andreyvelich): Currently it is not possible to set different webhook service name.
65-
// flag.StringVar(&serviceName, "webhook-service-name", "katib-controller", "The service name which will be used in webhook")
6668
// TODO (andreyvelich): Currently is is not possible to store webhook cert in the local file system.
6769
// flag.BoolVar(&certLocalFS, "cert-localfs", false, "Store the webhook cert in local file system")
6870

@@ -127,36 +129,61 @@ func main() {
127129
os.Exit(1)
128130
}
129131

130-
log.Info("Registering Components.")
132+
// Create a webhook server.
133+
hookServer := webhook.NewServer(webhook.Options{
134+
Port: *initConfig.ControllerConfig.WebhookPort,
135+
CertDir: consts.CertDir,
136+
})
131137

132-
// Setup all Controllers
133-
log.Info("Setting up controller.")
134-
if err := controller.AddToManager(mgr); err != nil {
135-
log.Error(err, "Unable to register controllers to the manager")
136-
os.Exit(1)
137-
}
138+
ctx := signals.SetupSignalHandler()
139+
certsReady := make(chan struct{})
138140

139-
log.Info("Setting up webhooks.")
140-
if err := webhook.AddToManager(mgr, *initConfig.ControllerConfig.WebhookPort); err != nil {
141-
log.Error(err, "Unable to register webhooks to the manager")
142-
os.Exit(1)
141+
if initConfig.CertGeneratorConfig.Enable {
142+
if err = cert.AddToManager(mgr, initConfig.CertGeneratorConfig, certsReady); err != nil {
143+
log.Error(err, "Failed to set up cert-generator")
144+
}
145+
} else {
146+
close(certsReady)
143147
}
144148

149+
// The setupControllers will register controllers to the manager
150+
// after generated certs for the admission webhooks.
151+
go setupControllers(mgr, certsReady, hookServer)
152+
145153
log.Info("Setting up health checker.")
146-
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
147-
log.Error(err, "Unable to add healthz endpoint to the manager")
148-
os.Exit(1)
149-
}
150154
// TODO (@anencore94) need to more detailed check whether is it possible to communicate with k8s-apiserver or db-manager at '/readyz' ?
151155
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
152156
log.Error(err, "Unable to add readyz endpoint to the manager")
153157
os.Exit(1)
154158
}
159+
if err = mgr.AddHealthzCheck("healthz", hookServer.StartedChecker()); err != nil {
160+
log.Error(err, "Add webhook server health checker to the manager failed")
161+
os.Exit(1)
162+
}
155163

156164
// Start the Cmd
157-
log.Info("Starting the Cmd.")
158-
if err := mgr.Start(signals.SetupSignalHandler()); err != nil {
165+
log.Info("Starting the manager.")
166+
if err = mgr.Start(ctx); err != nil {
159167
log.Error(err, "Unable to run the manager")
160168
os.Exit(1)
161169
}
162170
}
171+
172+
func setupControllers(mgr manager.Manager, certsReady chan struct{}, hookServer webhook.Server) {
173+
// The certsReady blocks to register controllers until generated certs.
174+
<-certsReady
175+
log.Info("Certs ready")
176+
177+
// Setup all Controllers
178+
log.Info("Setting up controller.")
179+
if err := controller.AddToManager(mgr); err != nil {
180+
log.Error(err, "Unable to register controllers to the manager")
181+
os.Exit(1)
182+
}
183+
184+
log.Info("Setting up webhooks.")
185+
if err := webhookv1beta1.AddToManager(mgr, hookServer); err != nil {
186+
log.Error(err, "Unable to register webhooks to the manager")
187+
os.Exit(1)
188+
}
189+
}

docs/developer-guide.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,23 +100,23 @@ plane CIDR source range to use the Katib webhooks
100100

101101
### Katib cert generator
102102

103-
Katib uses the custom `cert-generator` [Kubernetes Job](https://kubernetes.io/docs/concepts/workloads/controllers/job/)
104-
to generate certificates for the webhooks.
103+
Katib Controller has the internal `cert-generator` to generate certificates for the webhooks.
105104

106-
Once Katib is deployed in the Kubernetes cluster, the `cert-generator` Job follows these steps:
105+
Once Katib is deployed in the Kubernetes cluster, the `cert-generator` follows these steps:
107106

108107
- Generate the self-signed certificate and private key.
109108

110109
- Create a Kubernetes Secret with the self-signed TLS certificate and private key.
111-
Secret has the `katib-webhook-cert` name and `cert-generator` Job's
110+
Secret has the `katib-webhook-cert` name and `cert-generator` controller Deployment's
112111
`ownerReference` to clean-up resources once Katib is uninstalled.
113112

114-
Once Secret is created, the Katib controller Deployment spawns the Pod,
115-
since the controller has the `katib-webhook-cert` Secret volume.
113+
- Save the self-signed TLS certificate and private key on local path (`/tmp/cert`).
116114

117115
- Patch the webhooks with the `CABundle`.
118116

119-
You can find the `cert-generator` source code [here](../cmd/cert-generator/v1beta1).
117+
Once the `cert-generator` finished, the Katib controller starts to register controllers such as `experiment-controller` to the manager.
118+
119+
You can find the `cert-generator` source code [here](../pkg/cert-generator/v1beta1).
120120

121121
## Implement a new algorithm and use it in Katib
122122

docs/images-location.md

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,6 @@ The following table shows images for the
6464
<a href="https://github.com/docker-library/mysql/blob/c506174eab8ae160f56483e8d72410f8f1e1470f/8.0/Dockerfile.debian">Dockerfile</a>
6565
</td>
6666
</tr>
67-
<tr align="center">
68-
<td>
69-
<code>docker.io/kubeflowkatib/cert-generator</code>
70-
</td>
71-
<td>
72-
Katib Cert Generator
73-
</td>
74-
<td>
75-
<a href="https://github.com/kubeflow/katib/blob/master/cmd/cert-generator/v1beta1/Dockerfile">Dockerfile</a>
76-
</td>
77-
</tr>
7867
</tbody>
7968
</table>
8069

examples/v1beta1/argo/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ Check that Katib Controller's pod was restarted:
9696
$ kubectl get pods -n kubeflow
9797
9898
NAME READY STATUS RESTARTS AGE
99-
katib-cert-generator-hnv6q 0/1 Completed 0 6m12s
10099
katib-controller-784994d449-9bgj9 1/1 Running 0 28s
101100
katib-db-manager-78697c7bd4-ck7l8 1/1 Running 0 6m13s
102101
katib-mysql-854cdb87c4-krcm9 1/1 Running 0 6m13s

examples/v1beta1/kind-cluster/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ If the above script was successful, Katib components will be running:
2727
$ kubectl get pods -n kubeflow
2828
2929
NAME READY STATUS RESTARTS AGE
30-
katib-cert-generator-tc2jt 0/1 Completed 0 67s
3130
katib-controller-566595bdd8-x7z6w 1/1 Running 0 67s
3231
katib-db-manager-57cd769cdb-x4lnz 1/1 Running 0 67s
3332
katib-mysql-7894994f88-7l8nd 1/1 Running 0 67s

examples/v1beta1/tekton/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ Check that Katib Controller's pod was restarted:
101101
$ kubectl get pods -n kubeflow
102102
103103
NAME READY STATUS RESTARTS AGE
104-
katib-cert-generator-hnv6q 0/1 Completed 0 6m12s
105104
katib-controller-784994d449-9bgj9 1/1 Running 0 28s
106105
katib-db-manager-78697c7bd4-ck7l8 1/1 Running 0 6m13s
107106
katib-mysql-854cdb87c4-krcm9 1/1 Running 0 6m13s

0 commit comments

Comments
 (0)