Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions bigquery/external.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,27 @@ type ExternalDataConfig struct {
// Metadata Cache Mode for the table. Set this to
// enable caching of metadata from external data source.
MetadataCacheMode MetadataCacheMode

// Time zone used when parsing timestamp values that do not
// have specific time zone information (e.g. 2024-04-20 12:34:56).
// The expected format is a IANA timezone string (e.g. America/Los_Angeles).
TimeZone string

// Format used to parse DATE values. Supports C-style and
// SQL-style values
DateFormat string

// Format used to parse DATETIME values. Supports
// C-style and SQL-style values.
DatetimeFormat string

// Format used to parse TIME values. Supports C-style and
// SQL-style values.
TimeFormat string

// Format used to parse TIMESTAMP values. Supports
// C-style and SQL-style values.
TimestampFormat string
}

func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
Expand All @@ -147,6 +168,11 @@ func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
ConnectionId: e.ConnectionID,
ReferenceFileSchemaUri: e.ReferenceFileSchemaURI,
MetadataCacheMode: string(e.MetadataCacheMode),
TimeZone: e.TimeZone,
DateFormat: e.DateFormat,
DatetimeFormat: e.DatetimeFormat,
TimeFormat: e.TimeFormat,
TimestampFormat: e.TimestampFormat,
}
if e.Schema != nil {
q.Schema = e.Schema.toBQ()
Expand All @@ -173,6 +199,11 @@ func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfi
ConnectionID: q.ConnectionId,
ReferenceFileSchemaURI: q.ReferenceFileSchemaUri,
MetadataCacheMode: MetadataCacheMode(q.MetadataCacheMode),
TimeZone: q.TimeZone,
TimestampFormat: q.TimestampFormat,
TimeFormat: q.TimeFormat,
DateFormat: q.DateFormat,
DatetimeFormat: q.DatetimeFormat,
}
for _, v := range q.DecimalTargetTypes {
e.DecimalTargetTypes = append(e.DecimalTargetTypes, DecimalTargetType(v))
Expand Down Expand Up @@ -257,11 +288,26 @@ type CSVOptions struct {

// An optional custom string that will represent a NULL
// value in CSV import data.
//
// NullMarker and NullMarkers are mutually exclusive and should not be set at the same time.
NullMarker string

// An optional list of custom strings that will represent
// a NULL value in CSV import data.
//
// NullMarker and NullMarkers are mutually exclusive and should not be set at the same time.
NullMarkers []string

// Preserves the embedded ASCII control characters (the first 32 characters in the ASCII-table,
// from '\\x00' to '\\x1F') when loading from CSV. Only applicable to CSV, ignored for other formats.
PreserveASCIIControlCharacters bool

// SourceColumnMatch controls the strategy used to match loaded columns to the schema.
// If not set, a sensible default is chosen based on how the schema is provided. If
// autodetect is used, then columns are matched by name. Otherwise, columns
// are matched by position. This is done to keep the behavior
// backward-compatible.
SourceColumnMatch SourceColumnMatch
}

func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) {
Expand All @@ -273,6 +319,8 @@ func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration)
Quote: o.quote(),
SkipLeadingRows: o.SkipLeadingRows,
NullMarker: o.NullMarker,
NullMarkers: o.NullMarkers,
SourceColumnMatch: string(o.SourceColumnMatch),
PreserveAsciiControlCharacters: o.PreserveASCIIControlCharacters,
}
}
Expand Down Expand Up @@ -306,6 +354,8 @@ func bqToCSVOptions(q *bq.CsvOptions) *CSVOptions {
FieldDelimiter: q.FieldDelimiter,
SkipLeadingRows: q.SkipLeadingRows,
NullMarker: q.NullMarker,
NullMarkers: q.NullMarkers,
SourceColumnMatch: SourceColumnMatch(q.SourceColumnMatch),
PreserveASCIIControlCharacters: q.PreserveAsciiControlCharacters,
}
o.setQuote(q.Quote)
Expand Down
10 changes: 9 additions & 1 deletion bigquery/external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ func TestExternalDataConfig(t *testing.T) {
FieldDelimiter: "f",
Quote: "q",
SkipLeadingRows: 3,
NullMarker: "marker",
NullMarkers: []string{"marker"},
SourceColumnMatch: SourceColumnMatchPosition,
},
ConnectionID: "connection",
},
Expand Down Expand Up @@ -103,6 +104,13 @@ func TestExternalDataConfig(t *testing.T) {
SourceFormat: JSON,
MetadataCacheMode: Automatic,
},
{
TimeZone: "America/Los_Angeles",
TimestampFormat: "%a %b %e %I:%M:%S %Y",
TimeFormat: "%I:%M:%S",
DateFormat: "%A %b %e %Y",
DatetimeFormat: "%a %b %e %I:%M:%S %Y",
},
} {
q := want.toBQ()
got, err := bqToExternalDataConfig(&q)
Expand Down
53 changes: 53 additions & 0 deletions bigquery/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,27 @@ type FileConfig struct {

// Additional options for Avro files.
AvroOptions *AvroOptions

// Time zone used when parsing timestamp values that do not
// have specific time zone information (e.g. 2024-04-20 12:34:56).
// The expected format is a IANA timezone string (e.g. America/Los_Angeles).
TimeZone string

// Format used to parse DATE values. Supports C-style and
// SQL-style values
DateFormat string

// Format used to parse DATETIME values. Supports
// C-style and SQL-style values.
DatetimeFormat string

// Format used to parse TIME values. Supports C-style and
// SQL-style values.
TimeFormat string

// Format used to parse TIMESTAMP values. Supports
// C-style and SQL-style values.
TimestampFormat string
}

func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
Expand All @@ -93,6 +114,8 @@ func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
conf.MaxBadRecords = fc.MaxBadRecords
conf.NullMarker = fc.NullMarker
conf.NullMarkers = fc.NullMarkers
conf.SourceColumnMatch = string(fc.SourceColumnMatch)
conf.PreserveAsciiControlCharacters = fc.PreserveASCIIControlCharacters
if fc.Schema != nil {
conf.Schema = fc.Schema.toBQ()
Expand All @@ -107,6 +130,11 @@ func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
conf.UseAvroLogicalTypes = fc.AvroOptions.UseAvroLogicalTypes
}
conf.Quote = fc.quote()
conf.TimeZone = fc.TimeZone
conf.TimeFormat = fc.TimeFormat
conf.TimestampFormat = fc.TimestampFormat
conf.DatetimeFormat = fc.DatetimeFormat
conf.DateFormat = fc.DateFormat
}

func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
Expand All @@ -120,7 +148,14 @@ func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
fc.AllowQuotedNewlines = conf.AllowQuotedNewlines
fc.Encoding = Encoding(conf.Encoding)
fc.FieldDelimiter = conf.FieldDelimiter
fc.TimeZone = conf.TimeZone
fc.TimeFormat = conf.TimeFormat
fc.TimestampFormat = conf.TimestampFormat
fc.DatetimeFormat = conf.DatetimeFormat
fc.DateFormat = conf.DateFormat
fc.CSVOptions.NullMarker = conf.NullMarker
fc.CSVOptions.NullMarkers = conf.NullMarkers
fc.CSVOptions.SourceColumnMatch = SourceColumnMatch(conf.SourceColumnMatch)
fc.CSVOptions.PreserveASCIIControlCharacters = conf.PreserveAsciiControlCharacters
fc.CSVOptions.setQuote(conf.Quote)
}
Expand Down Expand Up @@ -165,3 +200,21 @@ const (
// ISO_8859_1 specifies the ISO-8859-1 encoding type.
ISO_8859_1 Encoding = "ISO-8859-1"
)

// SourceColumnMatch indicates the strategy used to match loaded columns to the schema.
type SourceColumnMatch string

const (
// SourceColumnMatchUnspecified keeps the default behavior. Which is to use
// sensible defaults based on how the schema is provided. If autodetect
// is used, then columns are matched by name. Otherwise, columns are matched
// by position. This is done to keep the behavior backward-compatible.
SourceColumnMatchUnspecified SourceColumnMatch = "SOURCE_COLUMN_MATCH_UNSPECIFIED"

// SourceColumnMatchPosition matches by position. This assumes that the columns are ordered the same
// way as the schema.
SourceColumnMatchPosition SourceColumnMatch = "POSITION"
// SourceColumnMatchName matches by name. This reads the header row as column names and reorders
// columns to match the field names in the schema.
SourceColumnMatchName SourceColumnMatch = "NAME"
)
26 changes: 23 additions & 3 deletions bigquery/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ var (
AllowJaggedRows: true,
AllowQuotedNewlines: true,
Encoding: UTF_8,
NullMarker: "marker",
NullMarkers: []string{"marker"},
PreserveASCIIControlCharacters: true,
SourceColumnMatch: SourceColumnMatchPosition,
},
}
)
Expand Down Expand Up @@ -73,8 +74,9 @@ func TestFileConfigPopulateLoadConfig(t *testing.T) {
Encoding: "UTF-8",
MaxBadRecords: 7,
IgnoreUnknownValues: true,
NullMarker: "marker",
NullMarkers: []string{"marker"},
PreserveAsciiControlCharacters: true,
SourceColumnMatch: "POSITION",
Schema: &bq.TableSchema{
Fields: []*bq.TableFieldSchema{
bqStringFieldSchema(),
Expand Down Expand Up @@ -113,6 +115,23 @@ func TestFileConfigPopulateLoadConfig(t *testing.T) {
UseAvroLogicalTypes: true,
},
},
{
description: "Custom date/datetime/time/timestamp formats",
fileConfig: &FileConfig{
TimeZone: "America/Los_Angeles",
TimestampFormat: "%a %b %e %I:%M:%S %Y",
TimeFormat: "%I:%M:%S",
DateFormat: "%A %b %e %Y",
DatetimeFormat: "%a %b %e %I:%M:%S %Y",
},
want: &bq.JobConfigurationLoad{
TimeZone: "America/Los_Angeles",
TimestampFormat: "%a %b %e %I:%M:%S %Y",
TimeFormat: "%I:%M:%S",
DateFormat: "%A %b %e %Y",
DatetimeFormat: "%a %b %e %I:%M:%S %Y",
},
},
}
for _, tc := range testcases {
got := &bq.JobConfigurationLoad{}
Expand Down Expand Up @@ -158,7 +177,8 @@ func TestFileConfigPopulateExternalDataConfig(t *testing.T) {
FieldDelimiter: "\t",
Quote: &hyphen,
SkipLeadingRows: 8,
NullMarker: "marker",
NullMarkers: []string{"marker"},
SourceColumnMatch: "POSITION",
PreserveAsciiControlCharacters: true,
},
},
Expand Down
Loading