Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,23 @@ import (
"testing"
"time"

"github.com/google/go-cmp/cmp"
commonv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/common/v1beta1"
v1beta1 "github.com/kubeflow/katib/pkg/apis/manager/v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
)

var testJsonDataPath = filepath.Join("testdata", "JSON")

func TestCollectObservationLog(t *testing.T) {
var (
testJsonDataPath = filepath.Join("testdata", "JSON")
testTextDataPath = filepath.Join("testdata", "TEXT")
)

if err := generateTestFiles(); err != nil {
func TestCollectJsonObservationLog(t *testing.T) {
if err := generateJsonTestFiles(); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(filepath.Dir(testJsonDataPath))

// TODO (tenzen-y): We should add tests for logs in TEXT format.
// Ref: https://github.com/kubeflow/katib/issues/1756
testCases := []struct {
description string
filePath string
Expand Down Expand Up @@ -167,7 +168,131 @@ func TestCollectObservationLog(t *testing.T) {
}
}

func generateTestFiles() error {
func TestCollectTextObservationLog(t *testing.T) {
if err := generateTextTestFiles(); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(filepath.Dir(testTextDataPath))

testCases := map[string]struct {
filePath string
metrics []string
filters []string
fileFormat commonv1beta1.FileFormat
err bool
expected *v1beta1.ObservationLog
}{
"Positive case for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, "good.log"),
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we mentioned in the issue, our motivation is to verify if this filter works fine to avoid bugs like this: #1754

We should add unit test for our File Metrics Collector to verify log parsing.
That will help to avoid issues similar to: #1754

So, could you add more than valid and invalid cases?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'll do it later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some new test cases similar to bugs in #1754 have been added to the test file.

fileFormat: commonv1beta1.TextFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.8078",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.5183",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.6752",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.3634",
},
},
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.8671",
},
},
},
},
},
"Invalid file name": {
filePath: "invalid",
err: true,
},
"Invalid file format": {
filePath: filepath.Join(testTextDataPath, "good.log"),
fileFormat: "invalid",
err: true,
},
"Invalid timestamp for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, "invalid-timestamp.log"),
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
fileFormat: commonv1beta1.TextFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.6752",
},
},
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.3634",
},
},
},
},
},
"Missing objective metric in training logs": {
filePath: filepath.Join(testTextDataPath, "missing-objective-metric.log"),
fileFormat: commonv1beta1.TextFormat,
metrics: []string{"accuracy", "loss"},
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: consts.UnavailableMetricValue,
},
},
},
},
},
}

for name, test := range testCases {
t.Run(name, func(t *testing.T) {
actual, err := CollectObservationLog(test.filePath, test.metrics, test.filters, test.fileFormat)
if (err != nil) != test.err {
t.Errorf("\nGOT: \n%v\nWANT: %v\n", err, test.err)
} else {
if cmp.Diff(actual, test.expected) != "" {
t.Errorf("Expected %v\n got %v", test.expected, actual)
}
}
})
}
}

func generateJsonTestFiles() error {
if _, err := os.Stat(testJsonDataPath); err != nil {
if err = os.MkdirAll(testJsonDataPath, 0700); err != nil {
return err
Expand Down Expand Up @@ -215,3 +340,43 @@ func generateTestFiles() error {

return nil
}

func generateTextTestFiles() error {
if _, err := os.Stat(testTextDataPath); err != nil {
if err = os.MkdirAll(testTextDataPath, 0700); err != nil {
return err
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move directory creation outside of these functions and create it before we invoke them ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are okay. But keeping them in generateXXXTestFiles will make the code look more tidy.

Copy link
Member

@andreyvelich andreyvelich Mar 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, but since we always run these functions sequentially why do we need to have duplicate code to create directories in each of them ?
Maybe we could have another function called: generateTestDirectory which will create testdata dir.
WDYT @Electronic-Waste @tenzen-y ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andreyvelich That sounds reasonable. Having separate function wouldn't be wroth it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant that consolidating 2 generators into 1 would be better.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, that sounds reasonable. I'll modify it soon.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andreyvelich @tenzen-y I also integrate os.RemoveAll operations into the func deleteTestDirs(). plz cc👀


testData := []struct {
fileName string
data string
}{
{
fileName: "good.log",
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.8078};{metricName: loss, metricValue: 0.5183}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
{metricName: loss, metricValue: 0.8671}`,
},
{
fileName: "invalid-timestamp.log",
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
invalid INFO {metricName: loss, metricValue: 0.3634}`,
},
{
fileName: "missing-objective-metric.log",
data: `2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.8671}`,
},
}

for _, td := range testData {
filePath := filepath.Join(testTextDataPath, td.fileName)
if err := os.WriteFile(filePath, []byte(td.data), 0600); err != nil {
return err
}
}

return nil
}