Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package sidecarmetricscollector

import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
Expand All @@ -34,15 +35,27 @@ import (
"github.com/kubeflow/katib/pkg/metricscollector/v1beta1/common"
)

var (
errFileFormat = fmt.Errorf("format must be set %v or %v", commonv1beta1.TextFormat, commonv1beta1.JsonFormat)
errOpenFile = errors.New("failed to open the file")
errReadFile = errors.New("failed to read the file")
errParseJson = errors.New("failed to parse the json object")
)

func CollectObservationLog(fileName string, metrics []string, filters []string, fileFormat commonv1beta1.FileFormat) (*v1beta1.ObservationLog, error) {
// we should check fileFormat first in case of opening an invalid file
if fileFormat != commonv1beta1.JsonFormat && fileFormat != commonv1beta1.TextFormat {
return nil, errFileFormat
}

file, err := os.Open(fileName)
if err != nil {
return nil, err
return nil, fmt.Errorf("%w: %s", errOpenFile, err.Error())
}
defer file.Close()
content, err := io.ReadAll(file)
if err != nil {
return nil, err
return nil, fmt.Errorf("%w: %s", errReadFile, err.Error())
}
logs := string(content)

Expand All @@ -51,8 +64,9 @@ func CollectObservationLog(fileName string, metrics []string, filters []string,
return parseLogsInTextFormat(strings.Split(logs, "\n"), metrics, filters)
case commonv1beta1.JsonFormat:
return parseLogsInJsonFormat(strings.Split(logs, "\n"), metrics)
default:
return nil, nil
}
return nil, fmt.Errorf("format must be set %v or %v", commonv1beta1.TextFormat, commonv1beta1.JsonFormat)
}

func parseLogsInTextFormat(logs []string, metrics []string, filters []string) (*v1beta1.ObservationLog, error) {
Expand Down Expand Up @@ -120,7 +134,7 @@ func parseLogsInJsonFormat(logs []string, metrics []string) (*v1beta1.Observatio
}
var jsonObj map[string]interface{}
if err := json.Unmarshal([]byte(logline), &jsonObj); err != nil {
return nil, err
return nil, fmt.Errorf("%w: %s", errParseJson, err.Error())
}

timestamp := time.Time{}.UTC().Format(time.RFC3339)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,52 +23,29 @@ import (
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
commonv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/common/v1beta1"
v1beta1 "github.com/kubeflow/katib/pkg/apis/manager/v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
)

const (
validJSONTestFile = "good.json"
invalidFormatJSONTestFile = "invalid-format.json"
invalidTimestampJSONTestFile = "invalid-timestamp.json"
missingMetricJSONTestFile = "missing-objective-metric.json"

validTEXTTestFile = "good.log"
invalidValueTEXTTestFile = "invalid-value.log"
invalidFormatTEXTTestFile = "invalid-format.log"
invalidTimestampTEXTTestFile = "invalid-timestamp.log"
missingMetricTEXTTestFile = "missing-objective-metric.log"
)

var (
testDir = "testdata"
testJsonDataPath = filepath.Join(testDir, "JSON")
testTextDataPath = filepath.Join(testDir, "TEXT")
)

func TestCollectObservationLog(t *testing.T) {
if err := generateTestDirs(); err != nil {
t.Fatal(err)
}
if err := generateJSONTestFiles(); err != nil {
t.Fatal(err)
}
if err := generateTEXTTestFiles(); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(testDir)

testCases := map[string]struct {
filePath string
fileName string
testData string
metrics []string
filters []string
fileFormat commonv1beta1.FileFormat
err bool
wantError error
expected *v1beta1.ObservationLog
}{
"Positive case for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, validJSONTestFile),
fileName: "good.json",
testData: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"}
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847.287801, "trial": "0"}
{"checkpoint_path": "", "global_step": "1", "loss": "0.1414974331855774", "timestamp": "2021-12-02T14:27:50.000035161Z", "trial": "0"}
{"acc": "0.9586416482925415", "checkpoint_path": "", "global_step": "1", "timestamp": "2021-12-02T14:27:50.000037459Z", "trial": "0"}
{"checkpoint_path": "", "global_step": "2", "loss": "0.10683439671993256", "trial": "0"}`,
metrics: []string{"acc", "loss"},
fileFormat: commonv1beta1.JsonFormat,
expected: &v1beta1.ObservationLog{
Expand Down Expand Up @@ -112,7 +89,14 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Positive case for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, validTEXTTestFile),
fileName: "good.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.8078};{metricName: loss, metricValue: 0.5183}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 100}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -0.4759}
{metricName: loss, metricValue: 0.8671}`,
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
fileFormat: commonv1beta1.TextFormat,
Expand Down Expand Up @@ -178,7 +162,11 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Invalid case for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, invalidValueTEXTTestFile),
fileName: "invalid-value.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: .333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: - 345.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.}`,
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
metrics: []string{"accuracy", "loss"},
fileFormat: commonv1beta1.TextFormat,
Expand All @@ -195,21 +183,26 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Invalid file name": {
filePath: "invalid",
err: true,
fileName: "invalid",
fileFormat: commonv1beta1.JsonFormat,
wantError: errOpenFile,
},
"Invalid file format": {
filePath: filepath.Join(testTextDataPath, validTEXTTestFile),
fileName: "good.log",
fileFormat: "invalid",
err: true,
wantError: errFileFormat,
},
"Invalid formatted file for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, invalidFormatJSONTestFile),
fileName: "invalid-format.json",
testData: `"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847.287801, "trial": "0`,
fileFormat: commonv1beta1.JsonFormat,
err: true,
wantError: errParseJson,
},
"Invalid formatted file for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, invalidFormatTEXTTestFile),
fileName: "invalid-format.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}`,
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
metrics: []string{"accuracy", "loss"},
fileFormat: commonv1beta1.TextFormat,
Expand All @@ -226,7 +219,9 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Invalid timestamp for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, invalidTimestampJSONTestFile),
fileName: "invalid-timestamp.json",
testData: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": "invalid", "trial": "0"}
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847, "trial": "0"}`,
fileFormat: commonv1beta1.JsonFormat,
metrics: []string{"acc", "loss"},
expected: &v1beta1.ObservationLog{
Expand All @@ -249,7 +244,9 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Invalid timestamp for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, invalidTimestampTEXTTestFile),
fileName: "invalid-timestamp.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
invalid INFO {metricName: loss, metricValue: 0.3634}`,
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
fileFormat: commonv1beta1.TextFormat,
Expand All @@ -273,7 +270,10 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Missing objective metric in JSON training logs": {
filePath: filepath.Join(testJsonDataPath, missingMetricJSONTestFile),
fileName: "missing-objective-metric.json",
testData: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"}
{"checkpoint_path": "", "global_step": "1", "loss": "0.1414974331855774", "timestamp": "2021-12-02T14:27:50.000035161+09:00", "trial": "0"}
{"checkpoint_path": "", "global_step": "2", "loss": "0.10683439671993256", "trial": "0"}`,
fileFormat: commonv1beta1.JsonFormat,
metrics: []string{"acc", "loss"},
expected: &v1beta1.ObservationLog{
Expand All @@ -289,7 +289,9 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Missing objective metric in TEXT training logs": {
filePath: filepath.Join(testTextDataPath, missingMetricTEXTTestFile),
fileName: "missing-objective-metric.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.8671}`,
fileFormat: commonv1beta1.TextFormat,
metrics: []string{"accuracy", "loss"},
expected: &v1beta1.ObservationLog{
Expand All @@ -306,126 +308,21 @@ func TestCollectObservationLog(t *testing.T) {
},
}

tmpDir := t.TempDir()
for name, test := range testCases {
t.Run(name, func(t *testing.T) {
actual, err := CollectObservationLog(test.filePath, test.metrics, test.filters, test.fileFormat)
if (err != nil) != test.err {
t.Errorf("\nGOT: \n%v\nWANT: %v\n", err, test.err)
} else {
if diff := cmp.Diff(test.expected, actual); diff != "" {
t.Errorf("Unexpected parsed result (-want,+got):\n%s", diff)
if test.testData != "" {
if err := os.WriteFile(filepath.Join(tmpDir, test.fileName), []byte(test.testData), 0600); err != nil {
t.Fatalf("failed to write test data: %v", err)
}
}
actual, err := CollectObservationLog(filepath.Join(tmpDir, test.fileName), test.metrics, test.filters, test.fileFormat)
if diff := cmp.Diff(test.wantError, err, cmpopts.EquateErrors()); len(diff) != 0 {
t.Errorf("Unexpected error (-want,+got):\n%s", diff)
}
if diff := cmp.Diff(test.expected, actual); len(diff) != 0 {
t.Errorf("Unexpected parsed result (-want,+got):\n%s", diff)
}
})
}
}

func generateTestDirs() error {
// Generate JSON files' dir
if _, err := os.Stat(testJsonDataPath); err != nil {
if err = os.MkdirAll(testJsonDataPath, 0700); err != nil {
return err
}
}

// Generate TEXT files' dir
if _, err := os.Stat(testTextDataPath); err != nil {
if err = os.MkdirAll(testTextDataPath, 0700); err != nil {
return err
}
}

return nil
}

func generateJSONTestFiles() error {
testData := []struct {
fileName string
data string
}{
{
fileName: validJSONTestFile,
data: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"}
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847.287801, "trial": "0"}
{"checkpoint_path": "", "global_step": "1", "loss": "0.1414974331855774", "timestamp": "2021-12-02T14:27:50.000035161Z", "trial": "0"}
{"acc": "0.9586416482925415", "checkpoint_path": "", "global_step": "1", "timestamp": "2021-12-02T14:27:50.000037459Z", "trial": "0"}
{"checkpoint_path": "", "global_step": "2", "loss": "0.10683439671993256", "trial": "0"}
`,
},
{
fileName: invalidFormatJSONTestFile,
data: `"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847.287801, "trial": "0
`,
},
{
fileName: invalidTimestampJSONTestFile,
data: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": "invalid", "trial": "0"}
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847, "trial": "0"}
`,
}, {
fileName: missingMetricJSONTestFile,
data: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"}
{"checkpoint_path": "", "global_step": "1", "loss": "0.1414974331855774", "timestamp": "2021-12-02T14:27:50.000035161+09:00", "trial": "0"}
{"checkpoint_path": "", "global_step": "2", "loss": "0.10683439671993256", "trial": "0"}`,
},
}

for _, td := range testData {
filePath := filepath.Join(testJsonDataPath, td.fileName)
if err := os.WriteFile(filePath, []byte(td.data), 0600); err != nil {
return err
}
}

return nil
}

func generateTEXTTestFiles() error {
testData := []struct {
fileName string
data string
}{
{
fileName: validTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.8078};{metricName: loss, metricValue: 0.5183}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 100}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -0.4759}
{metricName: loss, metricValue: 0.8671}`,
},
{
fileName: invalidValueTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: .333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: - 345.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.}`,
},
{
fileName: invalidFormatTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}`,
},
{
fileName: invalidTimestampTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
invalid INFO {metricName: loss, metricValue: 0.3634}`,
},
{
fileName: missingMetricTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.8671}`,
},
}

for _, td := range testData {
filePath := filepath.Join(testTextDataPath, td.fileName)
if err := os.WriteFile(filePath, []byte(td.data), 0600); err != nil {
return err
}
}

return nil
}