Skip to content

Commit 6539fbf

Browse files
authored
Update bigquery dependency and add support for BYTES datatype (#1045)
* Update BigQuery dependency * Add support for BYTES data type
1 parent 3b35a9e commit 6539fbf

File tree

12 files changed

+226
-116
lines changed

12 files changed

+226
-116
lines changed

gcloud-java-bigquery/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
<dependency>
3232
<groupId>com.google.apis</groupId>
3333
<artifactId>google-api-services-bigquery</artifactId>
34-
<version>v2-rev270-1.21.0</version>
34+
<version>v2-rev303-1.22.0</version>
3535
<scope>compile</scope>
3636
<exclusions>
3737
<exclusion>

gcloud-java-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public final class CsvOptions extends FormatOptions {
3434
private final String encoding;
3535
private final String fieldDelimiter;
3636
private final String quote;
37-
private final Integer skipLeadingRows;
37+
private final Long skipLeadingRows;
3838

3939
public static final class Builder {
4040

@@ -43,18 +43,27 @@ public static final class Builder {
4343
private String encoding;
4444
private String fieldDelimiter;
4545
private String quote;
46-
private Integer skipLeadingRows;
46+
private Long skipLeadingRows;
4747

4848
private Builder() {}
4949

50+
private Builder(CsvOptions csvOptions) {
51+
this.allowJaggedRows = csvOptions.allowJaggedRows;
52+
this.allowQuotedNewLines = csvOptions.allowQuotedNewLines;
53+
this.encoding = csvOptions.encoding;
54+
this.fieldDelimiter = csvOptions.fieldDelimiter;
55+
this.quote = csvOptions.quote;
56+
this.skipLeadingRows = csvOptions.skipLeadingRows;
57+
}
58+
5059
/**
5160
* Set whether BigQuery should accept rows that are missing trailing optional columns. If
5261
* {@code true}, BigQuery treats missing trailing columns as null values. If {@code false},
5362
* records with missing trailing columns are treated as bad records, and if there are too many
5463
* bad records, an invalid error is returned in the job result. By default, rows with missing
5564
* trailing columns are considered bad records.
5665
*/
57-
public Builder allowJaggedRows(Boolean allowJaggedRows) {
66+
public Builder allowJaggedRows(boolean allowJaggedRows) {
5867
this.allowJaggedRows = allowJaggedRows;
5968
return this;
6069
}
@@ -63,7 +72,7 @@ public Builder allowJaggedRows(Boolean allowJaggedRows) {
6372
* Sets whether BigQuery should allow quoted data sections that contain newline characters in a
6473
* CSV file. By default quoted newline are not allowed.
6574
*/
66-
public Builder allowQuotedNewLines(Boolean allowQuotedNewLines) {
75+
public Builder allowQuotedNewLines(boolean allowQuotedNewLines) {
6776
this.allowQuotedNewLines = allowQuotedNewLines;
6877
return this;
6978
}
@@ -104,7 +113,7 @@ public Builder fieldDelimiter(String fieldDelimiter) {
104113
* string to ISO-8859-1 encoding, and then uses the first byte of the encoded string to split
105114
* the data in its raw, binary state. The default value is a double-quote ('"'). If your data
106115
* does not contain quoted sections, set the property value to an empty string. If your data
107-
* contains quoted newline characters, you must also set {@link #allowQuotedNewLines(Boolean)}
116+
* contains quoted newline characters, you must also set {@link #allowQuotedNewLines(boolean)}
108117
* property to {@code true}.
109118
*/
110119
public Builder quote(String quote) {
@@ -117,7 +126,7 @@ public Builder quote(String quote) {
117126
* data. The default value is 0. This property is useful if you have header rows in the file
118127
* that should be skipped.
119128
*/
120-
public Builder skipLeadingRows(Integer skipLeadingRows) {
129+
public Builder skipLeadingRows(long skipLeadingRows) {
121130
this.skipLeadingRows = skipLeadingRows;
122131
return this;
123132
}
@@ -186,21 +195,15 @@ public String quote() {
186195
* Returns the number of rows at the top of a CSV file that BigQuery will skip when reading the
187196
* data.
188197
*/
189-
public Integer skipLeadingRows() {
198+
public Long skipLeadingRows() {
190199
return skipLeadingRows;
191200
}
192201

193202
/**
194203
* Returns a builder for the {@code CsvOptions} object.
195204
*/
196205
public Builder toBuilder() {
197-
return new Builder()
198-
.allowJaggedRows(allowJaggedRows)
199-
.allowQuotedNewLines(allowQuotedNewLines)
200-
.encoding(encoding)
201-
.fieldDelimiter(fieldDelimiter)
202-
.quote(quote)
203-
.skipLeadingRows(skipLeadingRows);
206+
return new Builder(this);
204207
}
205208

206209
@Override

gcloud-java-bigquery/src/main/java/com/google/cloud/bigquery/Field.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public static class Type implements Serializable {
7373
private static final long serialVersionUID = 2841484762609576959L;
7474

7575
public enum Value {
76-
STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD
76+
BYTES, STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD
7777
}
7878

7979
private final Value value;
@@ -108,6 +108,13 @@ public List<Field> fields() {
108108
return fields;
109109
}
110110

111+
/**
112+
* Returns a {@link Value#BYTES} field value.
113+
*/
114+
public static Type bytes() {
115+
return new Type(Value.BYTES);
116+
}
117+
111118
/**
112119
* Returns a {@link Value#STRING} field value.
113120
*/

gcloud-java-bigquery/src/main/java/com/google/cloud/bigquery/FieldValue.java

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import com.google.common.base.Function;
2424
import com.google.common.base.MoreObjects;
2525
import com.google.common.collect.Lists;
26+
import com.google.common.io.BaseEncoding;
2627

2728
import java.io.Serializable;
2829
import java.util.List;
@@ -54,7 +55,7 @@ public FieldValue apply(Object pb) {
5455
public enum Attribute {
5556
/**
5657
* A primitive field value. A {@code FieldValue} is primitive when the corresponding field has
57-
* type {@link Field.Type#bool()}, {@link Field.Type#string()},
58+
* type {@link Field.Type#bytes()}, {@link Field.Type#bool()}, {@link Field.Type#string()},
5859
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
5960
* {@link Field.Type#timestamp()} or the value is set to {@code null}.
6061
*/
@@ -80,7 +81,7 @@ public enum Attribute {
8081
* Returns the attribute of this Field Value.
8182
*
8283
* @return {@link Attribute#PRIMITIVE} if the field is a primitive type
83-
* ({@link Field.Type#bool()}, {@link Field.Type#string()},
84+
* ({@link Field.Type#bytes()}, {@link Field.Type#bool()}, {@link Field.Type#string()},
8485
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
8586
* {@link Field.Type#timestamp()}) or is {@code null}. Returns {@link Attribute#REPEATED} if
8687
* the corresponding field has ({@link Field.Mode#REPEATED}) mode. Returns
@@ -108,8 +109,8 @@ public Object value() {
108109

109110
/**
110111
* Returns this field's value as a {@link String}. This method should only be used if the
111-
* corresponding field has primitive type ({@link Field.Type#bool()}, {@link Field.Type#string()},
112-
* {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
112+
* corresponding field has primitive type ({@link Field.Type#bytes()}, {@link Field.Type#bool()},
113+
* {@link Field.Type#string()}, {@link Field.Type#floatingPoint()}, {@link Field.Type#integer()},
113114
* {@link Field.Type#timestamp()}).
114115
*
115116
* @throws ClassCastException if the field is not a primitive type
@@ -121,6 +122,22 @@ public String stringValue() {
121122
return (String) value;
122123
}
123124

125+
/**
126+
* Returns this field's value as a byte array. This method should only be used if the
127+
* corresponding field has primitive type ({@link Field.Type#bytes()}.
128+
*
129+
* @throws ClassCastException if the field is not a primitive type
130+
* @throws NullPointerException if {@link #isNull()} returns {@code true}
131+
* @throws IllegalStateException if the field value is not encoded in base64
132+
*/
133+
public byte[] bytesValue() {
134+
try {
135+
return BaseEncoding.base64().decode(stringValue());
136+
} catch (IllegalArgumentException ex) {
137+
throw new IllegalStateException(ex);
138+
}
139+
}
140+
124141
/**
125142
* Returns this field's value as a {@code long}. This method should only be used if the
126143
* corresponding field has {@link Field.Type#integer()} type.

gcloud-java-bigquery/src/main/java/com/google/cloud/bigquery/InsertAllRequest.java

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ public final class InsertAllRequest implements Serializable {
4949

5050
/**
5151
* A Google Big Query row to be inserted into a table. Each {@code RowToInsert} has an associated
52-
* id used by BigQuery to detect duplicate insertion requests on a best-effort basis.
52+
* id used by BigQuery to detect duplicate insertion requests on a best-effort basis. Please
53+
* notice that data for fields of type {@link Field.Type#bytes()} must be provided as a base64
54+
* encoded string.
5355
*
5456
* <p>Example usage of creating a row to insert:
5557
* <pre> {@code
@@ -58,8 +60,9 @@ public final class InsertAllRequest implements Serializable {
5860
* recordContent.put("subfieldName1", "value");
5961
* recordContent.put("subfieldName2", repeatedFieldValue);
6062
* Map<String, Object> rowContent = new HashMap<String, Object>();
61-
* rowContent.put("fieldName1", true);
62-
* rowContent.put("fieldName2", recordContent);
63+
* rowContent.put("booleanFieldName", true);
64+
* rowContent.put("bytesFieldName", "DQ4KDQ==");
65+
* rowContent.put("recordFieldName", recordContent);
6366
* RowToInsert row = new RowToInsert("rowId", rowContent);
6467
* }</pre>
6568
*
@@ -116,7 +119,8 @@ public boolean equals(Object obj) {
116119
}
117120

118121
/**
119-
* Creates a row to be inserted with associated id.
122+
* Creates a row to be inserted with associated id. Please notice that data for fields of type
123+
* {@link Field.Type#bytes()} must be provided as a base64 encoded string.
120124
*
121125
* @param id id of the row, used to identify duplicates
122126
* @param content the actual content of the row
@@ -126,7 +130,8 @@ public static RowToInsert of(String id, Map<String, Object> content) {
126130
}
127131

128132
/**
129-
* Creates a row to be inserted without associated id.
133+
* Creates a row to be inserted without associated id. Please notice that data for fields of
134+
* type {@link Field.Type#bytes()} must be provided as a base64 encoded string.
130135
*
131136
* @param content the actual content of the row
132137
*/
@@ -174,7 +179,8 @@ public Builder addRow(RowToInsert rowToInsert) {
174179
}
175180

176181
/**
177-
* Adds a row to be inserted with associated id.
182+
* Adds a row to be inserted with associated id. Please notice that data for fields of type
183+
* {@link Field.Type#bytes()} must be provided as a base64 encoded string.
178184
*
179185
* <p>Example usage of adding a row with associated id:
180186
* <pre> {@code
@@ -184,8 +190,9 @@ public Builder addRow(RowToInsert rowToInsert) {
184190
* recordContent.put("subfieldName1", "value");
185191
* recordContent.put("subfieldName2", repeatedFieldValue);
186192
* Map<String, Object> rowContent = new HashMap<String, Object>();
187-
* rowContent.put("fieldName1", true);
188-
* rowContent.put("fieldName2", recordContent);
193+
* rowContent.put("booleanFieldName", true);
194+
* rowContent.put("bytesFieldName", "DQ4KDQ==");
195+
* rowContent.put("recordFieldName", recordContent);
189196
* builder.addRow("rowId", rowContent);
190197
* }</pre>
191198
*/
@@ -195,7 +202,8 @@ public Builder addRow(String id, Map<String, Object> content) {
195202
}
196203

197204
/**
198-
* Adds a row to be inserted without an associated id.
205+
* Adds a row to be inserted without an associated id. Please notice that data for fields of
206+
* type {@link Field.Type#bytes()} must be provided as a base64 encoded string.
199207
*
200208
* <p>Example usage of adding a row without an associated id:
201209
* <pre> {@code
@@ -205,8 +213,9 @@ public Builder addRow(String id, Map<String, Object> content) {
205213
* recordContent.put("subfieldName1", "value");
206214
* recordContent.put("subfieldName2", repeatedFieldValue);
207215
* Map<String, Object> rowContent = new HashMap<String, Object>();
208-
* rowContent.put("fieldName1", true);
209-
* rowContent.put("fieldName2", recordContent);
216+
* rowContent.put("booleanFieldName", true);
217+
* rowContent.put("bytesFieldName", "DQ4KDQ==");
218+
* rowContent.put("recordFieldName", recordContent);
210219
* builder.addRow(rowContent);
211220
* }</pre>
212221
*/

gcloud-java-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import com.google.api.services.bigquery.model.JobConfigurationLoad;
2222
import com.google.common.base.MoreObjects.ToStringHelper;
2323
import com.google.common.collect.ImmutableList;
24+
import com.google.common.primitives.Ints;
2425

2526
import java.util.List;
2627
import java.util.Objects;
@@ -97,12 +98,18 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
9798
|| loadConfigurationPb.getQuote() != null
9899
|| loadConfigurationPb.getSkipLeadingRows() != null) {
99100
CsvOptions.Builder builder = CsvOptions.builder()
100-
.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows())
101-
.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines())
102101
.encoding(loadConfigurationPb.getEncoding())
103102
.fieldDelimiter(loadConfigurationPb.getFieldDelimiter())
104-
.quote(loadConfigurationPb.getQuote())
105-
.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
103+
.quote(loadConfigurationPb.getQuote());
104+
if (loadConfigurationPb.getAllowJaggedRows() != null) {
105+
builder.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows());
106+
}
107+
if (loadConfigurationPb.getAllowQuotedNewlines() != null) {
108+
builder.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines());
109+
}
110+
if (loadConfigurationPb.getSkipLeadingRows() != null) {
111+
builder.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
112+
}
106113
this.formatOptions = builder.build();
107114
}
108115
this.maxBadRecords = loadConfigurationPb.getMaxBadRecords();
@@ -300,8 +307,11 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
300307
.setAllowJaggedRows(csvOptions.allowJaggedRows())
301308
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
302309
.setEncoding(csvOptions.encoding())
303-
.setQuote(csvOptions.quote())
304-
.setSkipLeadingRows(csvOptions.skipLeadingRows());
310+
.setQuote(csvOptions.quote());
311+
if (csvOptions.skipLeadingRows() != null) {
312+
// todo(mziccard) remove checked cast or comment when #1044 is closed
313+
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.skipLeadingRows()));
314+
}
305315
}
306316
if (schema != null) {
307317
loadConfigurationPb.setSchema(schema.toPb());

gcloud-java-bigquery/src/main/java/com/google/cloud/bigquery/WriteChannelConfiguration.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
2424
import com.google.common.base.MoreObjects;
2525
import com.google.common.collect.ImmutableList;
26+
import com.google.common.primitives.Ints;
2627

2728
import java.io.Serializable;
2829
import java.util.List;
@@ -90,12 +91,18 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
9091
|| loadConfigurationPb.getQuote() != null
9192
|| loadConfigurationPb.getSkipLeadingRows() != null) {
9293
CsvOptions.Builder builder = CsvOptions.builder()
93-
.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows())
94-
.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines())
9594
.encoding(loadConfigurationPb.getEncoding())
9695
.fieldDelimiter(loadConfigurationPb.getFieldDelimiter())
97-
.quote(loadConfigurationPb.getQuote())
98-
.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
96+
.quote(loadConfigurationPb.getQuote());
97+
if (loadConfigurationPb.getAllowJaggedRows() != null) {
98+
builder.allowJaggedRows(loadConfigurationPb.getAllowJaggedRows());
99+
}
100+
if (loadConfigurationPb.getAllowQuotedNewlines() != null) {
101+
builder.allowQuotedNewLines(loadConfigurationPb.getAllowQuotedNewlines());
102+
}
103+
if (loadConfigurationPb.getSkipLeadingRows() != null) {
104+
builder.skipLeadingRows(loadConfigurationPb.getSkipLeadingRows());
105+
}
99106
this.formatOptions = builder.build();
100107
}
101108
this.maxBadRecords = loadConfigurationPb.getMaxBadRecords();
@@ -271,8 +278,11 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
271278
.setAllowJaggedRows(csvOptions.allowJaggedRows())
272279
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
273280
.setEncoding(csvOptions.encoding())
274-
.setQuote(csvOptions.quote())
275-
.setSkipLeadingRows(csvOptions.skipLeadingRows());
281+
.setQuote(csvOptions.quote());
282+
if (csvOptions.skipLeadingRows() != null) {
283+
// todo(mziccard) remove checked cast or comment when #1044 is closed
284+
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.skipLeadingRows()));
285+
}
276286
}
277287
if (schema != null) {
278288
loadConfigurationPb.setSchema(schema.toPb());

gcloud-java-bigquery/src/test/java/com/google/cloud/bigquery/CsvOptionsTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public class CsvOptionsTest {
3030
private static final Charset ENCODING = StandardCharsets.UTF_8;
3131
private static final String FIELD_DELIMITER = ",";
3232
private static final String QUOTE = "\"";
33-
private static final Integer SKIP_LEADING_ROWS = 42;
33+
private static final long SKIP_LEADING_ROWS = 42L;
3434
private static final CsvOptions CSV_OPTIONS = CsvOptions.builder()
3535
.allowJaggedRows(ALLOW_JAGGED_ROWS)
3636
.allowQuotedNewLines(ALLOW_QUOTED_NEWLINE)
@@ -65,7 +65,7 @@ public void testBuilder() {
6565
assertEquals(ENCODING.name(), CSV_OPTIONS.encoding());
6666
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.fieldDelimiter());
6767
assertEquals(QUOTE, CSV_OPTIONS.quote());
68-
assertEquals(SKIP_LEADING_ROWS, CSV_OPTIONS.skipLeadingRows());
68+
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.skipLeadingRows());
6969
}
7070

7171
@Test

0 commit comments

Comments
 (0)