Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions manifests/v1beta1/components/katib-controller/application.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
apiVersion: app.k8s.io/v1beta1
kind: Application
metadata:
name: katib-controller
spec:
addOwnerRef: true
componentKinds:
- group: core
kind: Service
- group: apps
kind: Deployment
- group: core
kind: Secret
- group: core
kind: ServiceAccount
- group: kubeflow.org
kind: Experiment
- group: kubeflow.org
kind: Suggestion
- group: kubeflow.org
kind: Trial
descriptor:
description: Katib is a service for hyperparameter tuning and neural architecture search.
keywords:
- katib
- katib-controller
- hyperparameter tuning
links:
- description: About
url: https://github.com/kubeflow/katib
maintainers:
- email: [email protected]
name: Ce Gao
- email: [email protected]
name: Johnu George
- email: [email protected]
name: Hougang Liu
- email: [email protected]
name: Richard Liu
- email: [email protected]
name: YujiOshima
- email: [email protected]
name: Andrey Velichkevich
owners:
- email: [email protected]
name: Ce Gao
- email: [email protected]
name: Johnu George
- email: [email protected]
name: Hougang Liu
- email: [email protected]
name: Richard Liu
- email: [email protected]
name: YujiOshima
- email: [email protected]
name: Andrey Velichkevich
type: katib
version: v1beta1
selector:
matchLabels:
app.kubernetes.io/component: katib
app.kubernetes.io/instance: katib-controller
app.kubernetes.io/managed-by: kfctl
app.kubernetes.io/name: katib-controller
app.kubernetes.io/part-of: kubeflow
62 changes: 62 additions & 0 deletions manifests/v1beta1/components/katib-controller/katib-configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: katib-config
data:
metrics-collector-sidecar: |-
{
"StdOut": {
"image": "docker.io/kubeflowkatib/file-metrics-collector:v1beta1-a96ff59"
},
"File": {
"image": "docker.io/kubeflowkatib/file-metrics-collector:v1beta1-a96ff59"
},
"TensorFlowEvent": {
"image": "docker.io/kubeflowkatib/tfevent-metrics-collector:v1beta1-a96ff59",
"resources": {
"limits": {
"memory": "1Gi"
}
}
}
}
suggestion: |-
{
"random": {
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v1beta1-a96ff59"
},
"grid": {
"image": "docker.io/kubeflowkatib/suggestion-chocolate:v1beta1-a96ff59"
},
"hyperband": {
"image": "docker.io/kubeflowkatib/suggestion-hyperband:v1beta1-a96ff59"
},
"bayesianoptimization": {
"image": "docker.io/kubeflowkatib/suggestion-skopt:v1beta1-a96ff59"
},
"tpe": {
"image": "docker.io/kubeflowkatib/suggestion-hyperopt:v1beta1-a96ff59"
},
"enas": {
"image": "docker.io/kubeflowkatib/suggestion-enas:v1beta1-a96ff59",
"imagePullPolicy": "Always",
"resources": {
"limits": {
"memory": "200Mi"
}
}
},
"cmaes": {
"image": "docker.io/kubeflowkatib/suggestion-goptuna:v1beta1-a96ff59"
},
"darts": {
"image": "docker.io/kubeflowkatib/suggestion-darts:v1beta1-a96ff59"
}
}
early-stopping: |-
{
"medianstop": {
"image": "docker.io/kubeflowkatib/earlystopping-medianstop:v1beta1-a96ff59",
"imagePullPolicy": "Always"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: katib-controller
labels:
app: katib-controller
spec:
replicas: 1
selector:
matchLabels:
app: katib-controller
template:
metadata:
labels:
app: katib-controller
annotations:
sidecar.istio.io/inject: "false"
prometheus.io/scrape: "true"
spec:
serviceAccountName: katib-controller
containers:
- name: katib-controller
image: docker.io/kubeflowkatib/katib-controller
imagePullPolicy: IfNotPresent
command: ["./katib-controller"]
args:
- "--webhook-port=8443"
- "--trial-resources=Job.v1.batch"
- "--trial-resources=TFJob.v1.kubeflow.org"
- "--trial-resources=PyTorchJob.v1.kubeflow.org"
- "--trial-resources=MPIJob.v1.kubeflow.org"
- "--trial-resources=PipelineRun.v1beta1.tekton.dev"
ports:
- containerPort: 8443
name: webhook
protocol: TCP
- containerPort: 8080
name: metrics
protocol: TCP
env:
- name: KATIB_CORE_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- mountPath: /tmp/cert
name: cert
readOnly: true
volumes:
- name: cert
secret:
defaultMode: 420
secretName: katib-controller
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: katib-controller
rules:
- apiGroups:
- ""
resources:
- configmaps
- serviceaccounts
- services
- secrets
- events
- namespaces
- persistentvolumes
- persistentvolumeclaims
verbs:
- "*"
- apiGroups:
- ""
resources:
- pods
- pods/log
- pods/status
verbs:
- "*"
- apiGroups:
- apps
resources:
- deployments
verbs:
- "*"
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- "*"
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- apiGroups:
- admissionregistration.k8s.io
resources:
- validatingwebhookconfigurations
- mutatingwebhookconfigurations
verbs:
- "*"
- apiGroups:
- kubeflow.org
resources:
- experiments
- experiments/status
- experiments/finalizers
- trials
- trials/status
- trials/finalizers
- suggestions
- suggestions/status
- suggestions/finalizers
verbs:
- "*"
- apiGroups:
- kubeflow.org
resources:
- tfjobs
- pytorchjobs
- mpijobs
verbs:
- "*"
- apiGroups:
- tekton.dev
resources:
- pipelineruns
- taskruns
verbs:
- "*"
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- rolebindings
verbs:
- "*"
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: katib-controller
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: katib-controller
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: katib-controller
subjects:
- kind: ServiceAccount
name: katib-controller

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-katib-admin
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true"
aggregationRule:
clusterRoleSelectors:
- matchLabels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-katib-admin: "true"
rules: []

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-katib-edit
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-katib-admin: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- experiments
- trials
- suggestions
verbs:
- get
- list
- watch
- create
- delete
- deletecollection
- patch
- update

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-katib-view
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- experiments
- trials
- suggestions
verbs:
- get
- list
- watch
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: Secret
metadata:
name: katib-controller
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
name: katib-controller
annotations:
prometheus.io/port: "8080"
prometheus.io/scheme: http
prometheus.io/scrape: "true"
spec:
ports:
- port: 443
protocol: TCP
targetPort: 8443
name: webhook
- name: metrics
port: 8080
targetPort: 8080
selector:
app: katib-controller
Loading