Skip to content

Commit fa3298d

Browse files
fix: resolve issue where skaffold logger could hang indefinitely if k8s job pod wasn't created (#8717)
1 parent 0367eba commit fa3298d

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed

pkg/skaffold/k8sjob/logger/log.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package logger
1818

1919
import (
2020
"context"
21+
"fmt"
2122
"io"
2223
"sync"
2324
"sync/atomic"
@@ -154,7 +155,6 @@ func (l *Logger) streamLogsFromKubernetesJob(ctx context.Context, id, namespace
154155
return true, nil
155156
}
156157
}
157-
158158
var podName string
159159
w, err := clientset.CoreV1().Pods(namespace).Watch(context.TODO(),
160160
metav1.ListOptions{
@@ -164,13 +164,25 @@ func (l *Logger) streamLogsFromKubernetesJob(ctx context.Context, id, namespace
164164
return false, nil
165165
}
166166

167-
for event := range w.ResultChan() {
168-
pod, ok := event.Object.(*corev1.Pod)
169-
if ok {
170-
podName = pod.Name
171-
// TODO(aaron-prindle) add support for jobs w/ multiple pods in the future
172-
break
167+
done := make(chan bool)
168+
go func() {
169+
for event := range w.ResultChan() {
170+
pod, ok := event.Object.(*corev1.Pod)
171+
if ok {
172+
podName = pod.Name
173+
done <- true
174+
break
175+
}
173176
}
177+
}()
178+
179+
select {
180+
case <-ctx.Done():
181+
return false, fmt.Errorf("context cancelled for k8s job logging of pod of kubernetes job: %s", "id")
182+
case <-done:
183+
// Continue
184+
case <-time.After(30 * time.Second): // Timeout after 30 seconds
185+
return false, fmt.Errorf("timeout waiting for event from pod of kubernetes job: %s", id)
174186
}
175187

176188
podLogOptions := &corev1.PodLogOptions{

pkg/skaffold/verify/k8sjob/verify.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ func (v *Verifier) createAndRunJob(ctx context.Context, tc latest.VerifyTestCase
222222
// This retrying is added as when attempting to kickoff multiple jobs simultaneously
223223
// This is because the k8s API server can be unresponsive when hit with a large
224224
// intitial set of Job CREATE requests
225-
if waitErr := wait.Poll(100*time.Millisecond, 10*time.Second, func() (bool, error) {
225+
if waitErr := wait.Poll(100*time.Millisecond, 30*time.Second, func() (bool, error) {
226226
_, err = clientset.BatchV1().Jobs(job.Namespace).Create(ctx, job, metav1.CreateOptions{})
227227
if err != nil {
228228
return false, nil

0 commit comments

Comments
 (0)