Skip to content

Commit 67b0320

Browse files
authored
feat(bigquery): load job and external table opts for custom time format, null markers and source column match (#12470)
1 parent a914ebe commit 67b0320

File tree

4 files changed

+135
-4
lines changed

4 files changed

+135
-4
lines changed

bigquery/external.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,27 @@ type ExternalDataConfig struct {
133133
// Metadata Cache Mode for the table. Set this to
134134
// enable caching of metadata from external data source.
135135
MetadataCacheMode MetadataCacheMode
136+
137+
// Time zone used when parsing timestamp values that do not
138+
// have specific time zone information (e.g. 2024-04-20 12:34:56).
139+
// The expected format is a IANA timezone string (e.g. America/Los_Angeles).
140+
TimeZone string
141+
142+
// Format used to parse DATE values. Supports C-style and
143+
// SQL-style values
144+
DateFormat string
145+
146+
// Format used to parse DATETIME values. Supports
147+
// C-style and SQL-style values.
148+
DatetimeFormat string
149+
150+
// Format used to parse TIME values. Supports C-style and
151+
// SQL-style values.
152+
TimeFormat string
153+
154+
// Format used to parse TIMESTAMP values. Supports
155+
// C-style and SQL-style values.
156+
TimestampFormat string
136157
}
137158

138159
func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
@@ -147,6 +168,11 @@ func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
147168
ConnectionId: e.ConnectionID,
148169
ReferenceFileSchemaUri: e.ReferenceFileSchemaURI,
149170
MetadataCacheMode: string(e.MetadataCacheMode),
171+
TimeZone: e.TimeZone,
172+
DateFormat: e.DateFormat,
173+
DatetimeFormat: e.DatetimeFormat,
174+
TimeFormat: e.TimeFormat,
175+
TimestampFormat: e.TimestampFormat,
150176
}
151177
if e.Schema != nil {
152178
q.Schema = e.Schema.toBQ()
@@ -173,6 +199,11 @@ func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfi
173199
ConnectionID: q.ConnectionId,
174200
ReferenceFileSchemaURI: q.ReferenceFileSchemaUri,
175201
MetadataCacheMode: MetadataCacheMode(q.MetadataCacheMode),
202+
TimeZone: q.TimeZone,
203+
TimestampFormat: q.TimestampFormat,
204+
TimeFormat: q.TimeFormat,
205+
DateFormat: q.DateFormat,
206+
DatetimeFormat: q.DatetimeFormat,
176207
}
177208
for _, v := range q.DecimalTargetTypes {
178209
e.DecimalTargetTypes = append(e.DecimalTargetTypes, DecimalTargetType(v))
@@ -257,11 +288,26 @@ type CSVOptions struct {
257288

258289
// An optional custom string that will represent a NULL
259290
// value in CSV import data.
291+
//
292+
// NullMarker and NullMarkers are mutually exclusive and should not be set at the same time.
260293
NullMarker string
261294

295+
// An optional list of custom strings that will represent
296+
// a NULL value in CSV import data.
297+
//
298+
// NullMarker and NullMarkers are mutually exclusive and should not be set at the same time.
299+
NullMarkers []string
300+
262301
// Preserves the embedded ASCII control characters (the first 32 characters in the ASCII-table,
263302
// from '\\x00' to '\\x1F') when loading from CSV. Only applicable to CSV, ignored for other formats.
264303
PreserveASCIIControlCharacters bool
304+
305+
// SourceColumnMatch controls the strategy used to match loaded columns to the schema.
306+
// If not set, a sensible default is chosen based on how the schema is provided. If
307+
// autodetect is used, then columns are matched by name. Otherwise, columns
308+
// are matched by position. This is done to keep the behavior
309+
// backward-compatible.
310+
SourceColumnMatch SourceColumnMatch
265311
}
266312

267313
func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) {
@@ -273,6 +319,8 @@ func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration)
273319
Quote: o.quote(),
274320
SkipLeadingRows: o.SkipLeadingRows,
275321
NullMarker: o.NullMarker,
322+
NullMarkers: o.NullMarkers,
323+
SourceColumnMatch: string(o.SourceColumnMatch),
276324
PreserveAsciiControlCharacters: o.PreserveASCIIControlCharacters,
277325
}
278326
}
@@ -306,6 +354,8 @@ func bqToCSVOptions(q *bq.CsvOptions) *CSVOptions {
306354
FieldDelimiter: q.FieldDelimiter,
307355
SkipLeadingRows: q.SkipLeadingRows,
308356
NullMarker: q.NullMarker,
357+
NullMarkers: q.NullMarkers,
358+
SourceColumnMatch: SourceColumnMatch(q.SourceColumnMatch),
309359
PreserveASCIIControlCharacters: q.PreserveAsciiControlCharacters,
310360
}
311361
o.setQuote(q.Quote)

bigquery/external_test.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ func TestExternalDataConfig(t *testing.T) {
3939
FieldDelimiter: "f",
4040
Quote: "q",
4141
SkipLeadingRows: 3,
42-
NullMarker: "marker",
42+
NullMarkers: []string{"marker"},
43+
SourceColumnMatch: SourceColumnMatchPosition,
4344
},
4445
ConnectionID: "connection",
4546
},
@@ -103,6 +104,13 @@ func TestExternalDataConfig(t *testing.T) {
103104
SourceFormat: JSON,
104105
MetadataCacheMode: Automatic,
105106
},
107+
{
108+
TimeZone: "America/Los_Angeles",
109+
TimestampFormat: "%a %b %e %I:%M:%S %Y",
110+
TimeFormat: "%I:%M:%S",
111+
DateFormat: "%A %b %e %Y",
112+
DatetimeFormat: "%a %b %e %I:%M:%S %Y",
113+
},
106114
} {
107115
q := want.toBQ()
108116
got, err := bqToExternalDataConfig(&q)

bigquery/file.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,27 @@ type FileConfig struct {
8080

8181
// Additional options for Avro files.
8282
AvroOptions *AvroOptions
83+
84+
// Time zone used when parsing timestamp values that do not
85+
// have specific time zone information (e.g. 2024-04-20 12:34:56).
86+
// The expected format is a IANA timezone string (e.g. America/Los_Angeles).
87+
TimeZone string
88+
89+
// Format used to parse DATE values. Supports C-style and
90+
// SQL-style values
91+
DateFormat string
92+
93+
// Format used to parse DATETIME values. Supports
94+
// C-style and SQL-style values.
95+
DatetimeFormat string
96+
97+
// Format used to parse TIME values. Supports C-style and
98+
// SQL-style values.
99+
TimeFormat string
100+
101+
// Format used to parse TIMESTAMP values. Supports
102+
// C-style and SQL-style values.
103+
TimestampFormat string
83104
}
84105

85106
func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
@@ -93,6 +114,8 @@ func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
93114
conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
94115
conf.MaxBadRecords = fc.MaxBadRecords
95116
conf.NullMarker = fc.NullMarker
117+
conf.NullMarkers = fc.NullMarkers
118+
conf.SourceColumnMatch = string(fc.SourceColumnMatch)
96119
conf.PreserveAsciiControlCharacters = fc.PreserveASCIIControlCharacters
97120
if fc.Schema != nil {
98121
conf.Schema = fc.Schema.toBQ()
@@ -107,6 +130,11 @@ func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
107130
conf.UseAvroLogicalTypes = fc.AvroOptions.UseAvroLogicalTypes
108131
}
109132
conf.Quote = fc.quote()
133+
conf.TimeZone = fc.TimeZone
134+
conf.TimeFormat = fc.TimeFormat
135+
conf.TimestampFormat = fc.TimestampFormat
136+
conf.DatetimeFormat = fc.DatetimeFormat
137+
conf.DateFormat = fc.DateFormat
110138
}
111139

112140
func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
@@ -120,7 +148,14 @@ func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
120148
fc.AllowQuotedNewlines = conf.AllowQuotedNewlines
121149
fc.Encoding = Encoding(conf.Encoding)
122150
fc.FieldDelimiter = conf.FieldDelimiter
151+
fc.TimeZone = conf.TimeZone
152+
fc.TimeFormat = conf.TimeFormat
153+
fc.TimestampFormat = conf.TimestampFormat
154+
fc.DatetimeFormat = conf.DatetimeFormat
155+
fc.DateFormat = conf.DateFormat
123156
fc.CSVOptions.NullMarker = conf.NullMarker
157+
fc.CSVOptions.NullMarkers = conf.NullMarkers
158+
fc.CSVOptions.SourceColumnMatch = SourceColumnMatch(conf.SourceColumnMatch)
124159
fc.CSVOptions.PreserveASCIIControlCharacters = conf.PreserveAsciiControlCharacters
125160
fc.CSVOptions.setQuote(conf.Quote)
126161
}
@@ -165,3 +200,21 @@ const (
165200
// ISO_8859_1 specifies the ISO-8859-1 encoding type.
166201
ISO_8859_1 Encoding = "ISO-8859-1"
167202
)
203+
204+
// SourceColumnMatch indicates the strategy used to match loaded columns to the schema.
205+
type SourceColumnMatch string
206+
207+
const (
208+
// SourceColumnMatchUnspecified keeps the default behavior. Which is to use
209+
// sensible defaults based on how the schema is provided. If autodetect
210+
// is used, then columns are matched by name. Otherwise, columns are matched
211+
// by position. This is done to keep the behavior backward-compatible.
212+
SourceColumnMatchUnspecified SourceColumnMatch = "SOURCE_COLUMN_MATCH_UNSPECIFIED"
213+
214+
// SourceColumnMatchPosition matches by position. This assumes that the columns are ordered the same
215+
// way as the schema.
216+
SourceColumnMatchPosition SourceColumnMatch = "POSITION"
217+
// SourceColumnMatchName matches by name. This reads the header row as column names and reorders
218+
// columns to match the field names in the schema.
219+
SourceColumnMatchName SourceColumnMatch = "NAME"
220+
)

bigquery/file_test.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ var (
3939
AllowJaggedRows: true,
4040
AllowQuotedNewlines: true,
4141
Encoding: UTF_8,
42-
NullMarker: "marker",
42+
NullMarkers: []string{"marker"},
4343
PreserveASCIIControlCharacters: true,
44+
SourceColumnMatch: SourceColumnMatchPosition,
4445
},
4546
}
4647
)
@@ -73,8 +74,9 @@ func TestFileConfigPopulateLoadConfig(t *testing.T) {
7374
Encoding: "UTF-8",
7475
MaxBadRecords: 7,
7576
IgnoreUnknownValues: true,
76-
NullMarker: "marker",
77+
NullMarkers: []string{"marker"},
7778
PreserveAsciiControlCharacters: true,
79+
SourceColumnMatch: "POSITION",
7880
Schema: &bq.TableSchema{
7981
Fields: []*bq.TableFieldSchema{
8082
bqStringFieldSchema(),
@@ -113,6 +115,23 @@ func TestFileConfigPopulateLoadConfig(t *testing.T) {
113115
UseAvroLogicalTypes: true,
114116
},
115117
},
118+
{
119+
description: "Custom date/datetime/time/timestamp formats",
120+
fileConfig: &FileConfig{
121+
TimeZone: "America/Los_Angeles",
122+
TimestampFormat: "%a %b %e %I:%M:%S %Y",
123+
TimeFormat: "%I:%M:%S",
124+
DateFormat: "%A %b %e %Y",
125+
DatetimeFormat: "%a %b %e %I:%M:%S %Y",
126+
},
127+
want: &bq.JobConfigurationLoad{
128+
TimeZone: "America/Los_Angeles",
129+
TimestampFormat: "%a %b %e %I:%M:%S %Y",
130+
TimeFormat: "%I:%M:%S",
131+
DateFormat: "%A %b %e %Y",
132+
DatetimeFormat: "%a %b %e %I:%M:%S %Y",
133+
},
134+
},
116135
}
117136
for _, tc := range testcases {
118137
got := &bq.JobConfigurationLoad{}
@@ -158,7 +177,8 @@ func TestFileConfigPopulateExternalDataConfig(t *testing.T) {
158177
FieldDelimiter: "\t",
159178
Quote: &hyphen,
160179
SkipLeadingRows: 8,
161-
NullMarker: "marker",
180+
NullMarkers: []string{"marker"},
181+
SourceColumnMatch: "POSITION",
162182
PreserveAsciiControlCharacters: true,
163183
},
164184
},

0 commit comments

Comments
 (0)