Skip to content

Commit e391421

Browse files
kaikailaVaniHaripriya
authored andcommitted
refactor(backend): introduce dialect-aware LargeText and replace longtext (+call-site type adjustments). Part of kubeflow#12063 (kubeflow#12163)
Signed-off-by: kaikaila <[email protected]>
1 parent 63a50c4 commit e391421

34 files changed

+374
-296
lines changed

backend/src/apiserver/config/config.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ func LoadSamples(resourceManager *resource.ResourceManager, sampleConfigPath str
141141
p, configErr = resourceManager.CreatePipeline(&model.Pipeline{
142142
Name: cfg.Name,
143143
DisplayName: pipelineDisplayName,
144-
Description: cfg.Description,
144+
Description: model.LargeText(cfg.Description),
145145
})
146146
if configErr != nil {
147147
// Log the error but not fail. The API Server pod can restart and it could potentially cause
@@ -191,9 +191,9 @@ func LoadSamples(resourceManager *resource.ResourceManager, sampleConfigPath str
191191
&model.PipelineVersion{
192192
Name: pvName,
193193
DisplayName: pvDisplayName,
194-
Description: pvDescription,
194+
Description: model.LargeText(pvDescription),
195195
PipelineId: p.UUID,
196-
PipelineSpec: string(pipelineFile),
196+
PipelineSpec: model.LargeText(string(pipelineFile)),
197197
},
198198
)
199199
if configErr != nil {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Copyright 2025 The Kubeflow Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package model
16+
17+
import (
18+
"database/sql/driver"
19+
"encoding/json"
20+
"fmt"
21+
22+
"gorm.io/gorm"
23+
"gorm.io/gorm/schema"
24+
)
25+
26+
// LargeText is a custom data type defined per GORM's recommendation for dialect-aware
27+
// large-text columns. It implements GormDBDataTypeInterface to return the appropriate
28+
// SQL type for each dialect (e.g., LONGTEXT for MySQL, TEXT for others).
29+
// For details, see https://gorm.io/docs/data_types.html#GormDataTypeInterface
30+
type LargeText string
31+
32+
func (LargeText) GormDBDataType(db *gorm.DB, field *schema.Field) string {
33+
switch db.Name() {
34+
case "mysql":
35+
return "LONGTEXT"
36+
default:
37+
return "TEXT"
38+
}
39+
}
40+
41+
func (lt LargeText) String() string {
42+
return string(lt)
43+
}
44+
45+
func (lt LargeText) Value() (driver.Value, error) {
46+
return string(lt), nil
47+
}
48+
49+
func (lt *LargeText) Scan(src any) error {
50+
switch v := src.(type) {
51+
case string:
52+
*lt = LargeText(v)
53+
case []byte:
54+
*lt = LargeText(string(v))
55+
case nil:
56+
*lt = ""
57+
default:
58+
return fmt.Errorf("unsupported type %T for LargeText", v)
59+
}
60+
return nil
61+
}
62+
63+
func (lt LargeText) MarshalJSON() ([]byte, error) {
64+
return json.Marshal(string(lt))
65+
}
66+
67+
func (lt *LargeText) UnmarshalJSON(b []byte) error {
68+
var s string
69+
if err := json.Unmarshal(b, &s); err != nil {
70+
return err
71+
}
72+
*lt = LargeText(s)
73+
return nil
74+
}

backend/src/apiserver/model/pipeline.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ type Pipeline struct {
4141
// Even though Namespace rarely uses its full 63-character capacity in practice,
4242
// MySQL calculates index length based on declared size, not actual content.
4343
// Therefore, keeping Name at varchar(128) is a safe upper bound.
44-
Name string `gorm:"column:Name; not null; uniqueIndex:namespace_name; type:varchar(128);"` // Index improves performance of the List and Get queries
45-
DisplayName string `gorm:"column:DisplayName; not null"`
46-
Description string `gorm:"column:Description; type:longtext; not null"`
44+
Name string `gorm:"column:Name; not null; uniqueIndex:namespace_name; type:varchar(128);"` // Index improves performance of the List and Get queries
45+
DisplayName string `gorm:"column:DisplayName; not null"`
46+
Description LargeText `gorm:"column:Description; not null"`
4747
// TODO(gkcalat): this is deprecated. Consider removing and adding data migration logic at the server startup.
48-
Parameters string `gorm:"column:Parameters; type:longtext;"`
48+
Parameters LargeText `gorm:"column:Parameters;"`
4949
Status PipelineStatus `gorm:"column:Status; not null;"`
5050
// TODO(gkcalat): this is deprecated. Consider removing and adding data migration logic at the server startup.
5151
DefaultVersionId string `gorm:"column:DefaultVersionId;"` // deprecated

backend/src/apiserver/model/pipeline_spec.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,21 @@ type PipelineSpec struct {
2828

2929
// Pipeline YAML definition. This is the pipeline interface for creating a pipeline.
3030
// Stored as longtext to support large manifests (up to 4GB in MySQL).
31+
// Stored as text in PostgreSQL.
3132
// https://dev.mysql.com/doc/refman/8.0/en/blob.html
3233
// TODO(kaikaila): consider enforcing a soft limit if needed for performance.
33-
PipelineSpecManifest string `gorm:"column:PipelineSpecManifest; type:longtext;"`
34+
PipelineSpecManifest LargeText `gorm:"column:PipelineSpecManifest;"`
3435

3536
// Argo workflow YAML definition. This is the Argo Spec converted from Pipeline YAML.
3637
// This is deprecated. Use the pipeline ID, pipeline version ID, or pipeline spec manifest.
37-
WorkflowSpecManifest string `gorm:"column:WorkflowSpecManifest; type:longtext;"`
38+
WorkflowSpecManifest LargeText `gorm:"column:WorkflowSpecManifest;"`
3839

3940
// Store parameters key-value pairs as serialized string.
4041
// This field is only used for V1 API. For V2, use the `Parameters` field in RuntimeConfig.
4142
// At most one of the fields `Parameters` and `RuntimeConfig` can be non-empty
4243
// This string stores an array of map[string]value. For example:
4344
// {"param1": Value1} will be stored as [{"name": "param1", "value":"value1"}].
44-
Parameters string `gorm:"column:Parameters; type:longtext;"`
45+
Parameters LargeText `gorm:"column:Parameters;"`
4546

4647
// Runtime config of the pipeline, only used for v2 template in API v1beta1 API.
4748
RuntimeConfig

backend/src/apiserver/model/pipeline_version.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type PipelineVersion struct {
3737
Name string `gorm:"column:Name; not null; type:varchar(127); uniqueIndex:idx_pipelineid_name;"`
3838
DisplayName string `gorm:"column:DisplayName; not null"`
3939
// TODO(gkcalat): this is deprecated. Consider removing and adding data migration logic at the server startup.
40-
Parameters string `gorm:"column:Parameters; not null; type:longtext;"` // deprecated
40+
Parameters LargeText `gorm:"column:Parameters; not null;"` // deprecated
4141
// PipelineVersion belongs to Pipeline. If a pipeline with a specific UUID
4242
// is deleted from Pipeline table, all this pipeline's versions will be
4343
// deleted from PipelineVersion table.
@@ -47,10 +47,11 @@ type PipelineVersion struct {
4747
Pipeline Pipeline `gorm:"foreignKey:PipelineId; references:UUID;constraint:pipeline_versions_PipelineId_pipelines_UUID_foreign,OnDelete:CASCADE,OnUpdate:CASCADE"` // This 'belongs to' relation replaces the legacy AddForeignKey constraint previously defined in client_manager.go
4848
Status PipelineVersionStatus `gorm:"column:Status; not null;"`
4949
// Code source url links to the pipeline version's definition in repo.
50-
CodeSourceUrl string `gorm:"column:CodeSourceUrl;"`
51-
Description string `gorm:"column:Description; type:longtext;"`
52-
PipelineSpec string `gorm:"column:PipelineSpec; not null; type:longtext;"` // Same as common.MaxFileLength (32MB in server). Argo imposes 700kB limit
53-
PipelineSpecURI string `gorm:"column:PipelineSpecURI; not null; type:longtext;"` // Can store references to ObjectStore files
50+
// nolint:staticcheck // [ST1003] Field name matches upstream legacy naming
51+
CodeSourceUrl string `gorm:"column:CodeSourceUrl;"`
52+
Description LargeText `gorm:"column:Description;"`
53+
PipelineSpec LargeText `gorm:"column:PipelineSpec; not null;"` // Same as common.MaxFileLength (32MB in server). Argo imposes 700kB limit
54+
PipelineSpecURI LargeText `gorm:"column:PipelineSpecURI; not null;"` // Can store references to ObjectStore files
5455
}
5556

5657
func (p PipelineVersion) GetValueOfPrimaryKey() string {

backend/src/apiserver/model/resource_reference.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ type ResourceReference struct {
168168
Relationship Relationship `gorm:"column:Relationship; not null;"`
169169

170170
// JSON-encoded metadata blob about the reference
171-
Payload string `gorm:"column:Payload; not null; type: longtext"`
171+
Payload LargeText `gorm:"column:Payload; not null;"`
172172
}
173173

174174
type ReferenceKey struct {

backend/src/apiserver/model/run.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -317,12 +317,12 @@ type RunDetails struct {
317317
// For details on type lengths and index safety, refer to comments in the Pipeline struct.
318318
Conditions string `gorm:"column:Conditions; type:varchar(125); not null; index:experimentuuid_conditions_finishedatinsec,priority:2;index:namespace_conditions_finishedatinsec,priority:2"`
319319
State RuntimeState `gorm:"column:State; default:null;"`
320-
StateHistoryString string `gorm:"column:StateHistory; default:null; type: longtext;"`
320+
StateHistoryString LargeText `gorm:"column:StateHistory; default:null;"`
321321
StateHistory []*RuntimeStatus `gorm:"-;"`
322322
// Serialized runtime details of a run in v2beta1
323-
PipelineRuntimeManifest string `gorm:"column:PipelineRuntimeManifest; not null; type:longtext;"`
323+
PipelineRuntimeManifest LargeText `gorm:"column:PipelineRuntimeManifest; not null;"`
324324
// Serialized Argo CRD in v1beta1
325-
WorkflowRuntimeManifest string `gorm:"column:WorkflowRuntimeManifest; not null; type:longtext;"`
325+
WorkflowRuntimeManifest LargeText `gorm:"column:WorkflowRuntimeManifest; not null;"`
326326
// nolint:staticcheck // [ST1003] Field name matches upstream legacy naming
327327
PipelineContextId int64 `gorm:"column:PipelineContextId; default:0;"`
328328
// nolint:staticcheck // [ST1003] Field name matches upstream legacy naming
@@ -333,12 +333,12 @@ type RunDetails struct {
333333
}
334334

335335
type RunMetric struct {
336-
RunUUID string `gorm:"column:RunUUID; not null; primaryKey; type:varchar(191);"`
337-
NodeID string `gorm:"column:NodeID; not null; primaryKey; type:varchar(191);"`
338-
Name string `gorm:"column:Name; not null; primaryKey; type:varchar(191);"`
339-
NumberValue float64 `gorm:"column:NumberValue;"`
340-
Format string `gorm:"column:Format;"`
341-
Payload string `gorm:"column:Payload; not null; type:longtext;"`
336+
RunUUID string `gorm:"column:RunUUID; not null; primaryKey; type:varchar(191);"`
337+
NodeID string `gorm:"column:NodeID; not null; primaryKey; type:varchar(191);"`
338+
Name string `gorm:"column:Name; not null; primaryKey; type:varchar(191);"`
339+
NumberValue float64 `gorm:"column:NumberValue;"`
340+
Format string `gorm:"column:Format;"`
341+
Payload LargeText `gorm:"column:Payload; not null;"`
342342
}
343343

344344
type RuntimeStatus struct {

backend/src/apiserver/model/runtime_config.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ package model
1616

1717
type RuntimeConfig struct {
1818
// Store parameters key-value pairs as serialized string.
19-
Parameters string `gorm:"column:RuntimeParameters; type:longtext;"`
19+
Parameters LargeText `gorm:"column:RuntimeParameters;"`
2020

2121
// A path in a object store bucket which will be treated as the root
2222
// output directory of the pipeline. It is used by the system to
2323
// generate the paths of output artifacts. Ref:(https://www.kubeflow.org/docs/components/pipelines/pipeline-root/)
24-
PipelineRoot string `gorm:"column:PipelineRoot; type:longtext;"`
24+
PipelineRoot LargeText `gorm:"column:PipelineRoot;"`
2525
}

backend/src/apiserver/model/task.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ type Task struct {
3737
Name string `gorm:"column:Name; default:null"`
3838
ParentTaskId string `gorm:"column:ParentTaskUUID; default:null"`
3939
State RuntimeState `gorm:"column:State; default:null;"`
40-
StateHistoryString string `gorm:"column:StateHistory; default:null; type:longtext;"`
41-
MLMDInputs string `gorm:"column:MLMDInputs; default:null; type:longtext;"`
42-
MLMDOutputs string `gorm:"column:MLMDOutputs; default:null; type:longtext;"`
43-
ChildrenPodsString string `gorm:"column:ChildrenPods; default:null; type:longtext;"`
40+
StateHistoryString LargeText `gorm:"column:StateHistory; default:null;"`
41+
MLMDInputs LargeText `gorm:"column:MLMDInputs; default:null;"`
42+
MLMDOutputs LargeText `gorm:"column:MLMDOutputs; default:null;"`
43+
ChildrenPodsString LargeText `gorm:"column:ChildrenPods; default:null;"`
4444
StateHistory []*RuntimeStatus `gorm:"-;"`
4545
ChildrenPods []string `gorm:"-;"`
46-
Payload string `gorm:"column:Payload; default:null; type:longtext;"`
46+
Payload LargeText `gorm:"column:Payload; default:null;"`
4747
}
4848

4949
func (t Task) ToString() string {

0 commit comments

Comments
 (0)