Skip to content

Commit b8654eb

Browse files
committed
Backend for getting logs of a trial
1 parent 0a1cb31 commit b8654eb

File tree

3 files changed

+103
-0
lines changed

3 files changed

+103
-0
lines changed

cmd/new-ui/v1beta1/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ func main() {
6767
http.HandleFunc("/katib/edit_template/", kuh.EditTemplate)
6868
http.HandleFunc("/katib/delete_template/", kuh.DeleteTemplate)
6969
http.HandleFunc("/katib/fetch_namespaces", kuh.FetchNamespaces)
70+
http.HandleFunc("/katib/fetch_trial_logs/", kuh.FetchTrialLogs)
7071

7172
log.Printf("Serving at %s:%s", *host, *port)
7273
if err := http.ListenAndServe(fmt.Sprintf("%s:%s", *host, *port), nil); err != nil {

manifests/v1beta1/components/ui/rbac.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ rules:
1919
- suggestions
2020
verbs:
2121
- "*"
22+
- apiGroups:
23+
- ""
24+
resources:
25+
- pods
26+
- pods/log
27+
verbs:
28+
- list
29+
- get
2230
---
2331
apiVersion: v1
2432
kind: ServiceAccount

pkg/new-ui/v1beta1/backend.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ limitations under the License.
1717
package v1beta1
1818

1919
import (
20+
"bytes"
21+
"context"
2022
"encoding/json"
23+
"io"
2124
"log"
2225
"net/http"
2326
"path/filepath"
@@ -27,8 +30,15 @@ import (
2730
"sigs.k8s.io/controller-runtime/pkg/client"
2831

2932
experimentv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/experiments/v1beta1"
33+
trialsv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/trials/v1beta1"
3034
api_pb_v1beta1 "github.com/kubeflow/katib/pkg/apis/manager/v1beta1"
3135
"github.com/kubeflow/katib/pkg/util/v1beta1/katibclient"
36+
37+
apiv1 "k8s.io/api/core/v1"
38+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
39+
"k8s.io/apimachinery/pkg/types"
40+
corev1 "k8s.io/client-go/kubernetes/typed/core/v1"
41+
"sigs.k8s.io/controller-runtime/pkg/client/config"
3242
)
3343

3444
func NewKatibUIHandler(dbManagerAddr string) *KatibUIHandler {
@@ -421,3 +431,87 @@ func (k *KatibUIHandler) FetchTrial(w http.ResponseWriter, r *http.Request) {
421431
return
422432
}
423433
}
434+
435+
// FetchTrialLogs fetches logs for a trial in specific namespace.
436+
func (k *KatibUIHandler) FetchTrialLogs(w http.ResponseWriter, r *http.Request) {
437+
log.Printf("Requesting logs")
438+
439+
trialName := r.URL.Query()["trialName"][0]
440+
namespace := r.URL.Query()["namespace"][0]
441+
log.Printf("Requesting logs")
442+
443+
logs, err := getTrialLogs(k, trialName, namespace)
444+
if err != nil {
445+
log.Printf("GetLogs failed: %v", err)
446+
http.Error(w, err.Error(), http.StatusInternalServerError)
447+
return
448+
}
449+
response, err := json.Marshal(logs)
450+
if err != nil {
451+
log.Printf("Marshal logs failed: %v", err)
452+
http.Error(w, err.Error(), http.StatusInternalServerError)
453+
return
454+
}
455+
w.Write(response)
456+
}
457+
458+
// GetTrialLogs returns logs of a master Pod for the given job name and namespace
459+
func getTrialLogs(k *KatibUIHandler, trialName string, namespace string) (string, error) {
460+
cfg, err := config.GetConfig()
461+
if err != nil {
462+
return "", err
463+
}
464+
465+
clientset, err := corev1.NewForConfig(cfg)
466+
if err != nil {
467+
return "", err
468+
}
469+
470+
trial := &trialsv1beta1.Trial{}
471+
if err := k.katibClient.GetClient().Get(context.TODO(), types.NamespacedName{Name: trialName, Namespace: namespace}, trial); err != nil {
472+
return "", err
473+
}
474+
475+
selectionLabel := "training.kubeflow.org/job-name=" + trialName + ",training.kubeflow.org/job-role=master"
476+
if trial.Spec.RunSpec.GetKind() == "Job" {
477+
selectionLabel = "job-name=" + trialName
478+
}
479+
480+
podList, err := clientset.Pods(namespace).List(context.Background(), metav1.ListOptions{LabelSelector: selectionLabel})
481+
if err != nil {
482+
return "", err
483+
}
484+
485+
if len(podList.Items) == 0 {
486+
message := `Logs for the trial could not be found.
487+
Was 'retain: true' specified in the Experiment definition?
488+
An example can be found here: https://github.com/kubeflow/katib/blob/master/examples/v1beta1/argo/argo-workflow.yaml#L33`
489+
490+
return message, nil
491+
}
492+
493+
podLogOpts := apiv1.PodLogOptions{}
494+
podLogOpts.Container = trial.Spec.PrimaryContainerName
495+
for container := range podList.Items[0].Spec.Containers {
496+
if podList.Items[0].Spec.Containers[container].Name == "metrics-logger-and-collector" {
497+
podLogOpts.Container = "metrics-logger-and-collector"
498+
break
499+
}
500+
}
501+
502+
req := clientset.Pods(namespace).GetLogs(podList.Items[0].Name, &podLogOpts)
503+
podLogs, err := req.Stream(context.Background())
504+
if err != nil {
505+
return "", err
506+
}
507+
defer podLogs.Close()
508+
509+
buf := new(bytes.Buffer)
510+
_, err = io.Copy(buf, podLogs)
511+
if err != nil {
512+
return "", err
513+
}
514+
str := buf.String()
515+
516+
return str, nil
517+
}

0 commit comments

Comments
 (0)