Skip to content

Commit e95f4f6

Browse files
committed
E2E: Add additional check to verify if the components are ready
Signed-off-by: Yuki Iwai <[email protected]>
1 parent 888bec3 commit e95f4f6

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,36 @@ echo "Deploying Katib"
6565
cd ../../../../../ && WITH_DATABASE_TYPE=$WITH_DATABASE_TYPE make deploy && cd -
6666

6767
# Wait until all Katib pods is running.
68+
MAX_RETRY_COUNT=10
69+
until [ "$(kubectl get pods -n kubeflow -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,ui,db-manager)" -oname | wc -l)" = "4" ]; do
70+
echo "Waiting for katib components to be created"
71+
kubectl get pods -n kubeflow || :
72+
73+
sleep 2
74+
MAX_RETRY_COUNT=$((MAX_RETRY_COUNT - 1))
75+
if [ "${MAX_RETRY_COUNT}" = "0" ]; then
76+
echo "Failed to create katib components"
77+
exit 1
78+
fi
79+
done
80+
6881
TIMEOUT=120s
6982
kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,db-manager,ui)" -n kubeflow pod ||
7083
(kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1)
7184

85+
MAX_RETRY_COUNT=10
86+
until [ "$(kubectl get pod -n kubeflow -l katib.kubeflow.org/component=controller -ojson | jq '.items[].status.conditions[] | select(.type=="ContainersReady").status')" = "True" ]; do
87+
echo "Waiting for Katib controller to be ready"
88+
kubectl get pods -n kubeflow || :
89+
90+
sleep 2
91+
MAX_RETRY_COUNT=$((MAX_RETRY_COUNT - 1))
92+
if [ "${MAX_RETRY_COUNT}" = "0" ]; then
93+
echo "Failed to set up Katib controller"
94+
exit 1
95+
fi
96+
done
97+
7298
echo "All Katib components are running."
7399
echo "Katib deployments"
74100
kubectl -n kubeflow get deploy

0 commit comments

Comments
 (0)