Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1896,9 +1896,10 @@ def _temp_path():
generate_map_case(),

generate_non_canonical_map_case()
.skip_tester('Java') # TODO(ARROW-8715)
# .skip_tester('Java') # TODO(ARROW-8715)
# Canonical map names are restored on import, so the schemas are unequal
.skip_format(SKIP_C_SCHEMA, 'C++'),
.skip_format(SKIP_C_SCHEMA, 'C++')
.skip_format(SKIP_C_SCHEMA, 'Java'),

generate_nested_case(),

Expand Down
49 changes: 49 additions & 0 deletions java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,55 @@ public void testMapVector() {
}
}

@Test
public void testMapVectorCustomFieldNames() {
Field keyField =
new Field(
"id", FieldType.notNullable(new ArrowType.Int(64, true)), Collections.emptyList());
Field valueField =
new Field(
"value", FieldType.nullable(new ArrowType.Int(64, true)), Collections.emptyList());
List<Field> structFields = new ArrayList<>();
structFields.add(keyField);
structFields.add(valueField);

Field structField =
new Field("entry", FieldType.notNullable(ArrowType.Struct.INSTANCE), structFields);
Field mapIntToIntField =
new Field(
"mapFieldIntToInt",
FieldType.notNullable(new ArrowType.Map(false)),
Collections.singletonList(structField));

Schema schema = new Schema(Collections.singletonList(mapIntToIntField));
try (VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator);
MapVector mapVector = (MapVector) vectorSchemaRoot.getVector("mapFieldIntToInt")) {
UnionMapWriter mapWriter = mapVector.getWriter();
mapWriter.setPosition(0);
mapWriter.startMap();
for (int i = 0; i < 18; i++) {
mapWriter.startEntry();
if (i % 2 == 0) {
mapWriter.key().bigInt().writeBigInt(i);
if (i % 3 == 0) {
mapWriter.value().bigInt().writeBigInt(i * 7);
} else {
mapWriter.value().bigInt().writeNull();
}
} else {
mapWriter.key().bigInt().writeNull();
mapWriter.value().bigInt().writeNull();
}
mapWriter.endEntry();
}
mapWriter.endMap();

mapWriter.setValueCount(1);
vectorSchemaRoot.setRowCount(1);
roundtrip(mapVector, MapVector.class);
}
}

@Test
public void testUnionVector() {
final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
Expand Down
56 changes: 56 additions & 0 deletions java/c/src/test/java/org/apache/arrow/c/StreamTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.compare.Range;
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.impl.UnionMapWriter;
import org.apache.arrow.vector.dictionary.Dictionary;
import org.apache.arrow.vector.dictionary.DictionaryProvider;
import org.apache.arrow.vector.ipc.ArrowReader;
Expand Down Expand Up @@ -263,6 +265,60 @@ public void roundtripDictionary() throws Exception {
}
}

@Test
public void roundtripMap() throws Exception {
Field keyField =
new Field(
"id", FieldType.notNullable(new ArrowType.Int(64, true)), Collections.emptyList());
Field valueField =
new Field(
"value", FieldType.nullable(new ArrowType.Int(64, true)), Collections.emptyList());
List<Field> structFields = new ArrayList<>();
structFields.add(keyField);
structFields.add(valueField);

Field structField =
new Field("entry", FieldType.notNullable(ArrowType.Struct.INSTANCE), structFields);
Field mapIntToIntField =
new Field(
"mapFieldIntToInt",
FieldType.notNullable(new ArrowType.Map(false)),
Collections.singletonList(structField));

Schema schema = new Schema(Collections.singletonList(mapIntToIntField));
final List<ArrowRecordBatch> batches = new ArrayList<>();

try (VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator);
MapVector mapVector = (MapVector) vectorSchemaRoot.getVector("mapFieldIntToInt")) {
UnionMapWriter mapWriter = mapVector.getWriter();
mapWriter.setPosition(0);
mapWriter.startMap();
for (int i = 0; i < 18; i++) {
mapWriter.startEntry();
if (i % 2 == 0) {
mapWriter.key().bigInt().writeBigInt(i);
if (i % 3 == 0) {
mapWriter.value().bigInt().writeBigInt(i * 7);
} else {
mapWriter.value().bigInt().writeNull();
}
} else {
mapWriter.key().bigInt().writeNull();
mapWriter.value().bigInt().writeNull();
}
mapWriter.endEntry();
}
mapWriter.endMap();

mapWriter.setValueCount(1);
vectorSchemaRoot.setRowCount(1);

VectorUnloader unloader = new VectorUnloader(vectorSchemaRoot);
batches.add(unloader.getRecordBatch());
roundtrip(schema, batches);
}
}

@Test
public void importReleasedStream() {
try (final ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator)) {
Expand Down
31 changes: 29 additions & 2 deletions java/vector/src/main/codegen/templates/UnionMapWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,33 @@ public UnionMapWriter value() {
return this;
}

private String getWriteFieldName() {
Field mapField = this.vector.getField();
Preconditions.checkNotNull(mapField, "MapVector does not have a field.");
Preconditions.checkArgument(mapField.getChildren().size() == 1,
"MapVector does not have a single struct field.");
Field structField = mapField.getChildren().get(0);
switch (mode) {
case KEY:
if (structField.getChildren().size() == 0) {
// key is not defined in the struct, use default name
return MapVector.KEY_NAME;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should only be if there are 0 children? If there's any other number of children presumably it should fail

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, so better to check size==0 and assign the KEY_NAME else, the field from the struct. Also for value, I think the most accurate check would be size < 2, because there could be no key defined, then there could be 1 child (when key was added).

} else {
return structField.getChildren().get(0).getName();
}
case VALUE:
if (structField.getChildren().size() < 2) {
// key may or may not have been defined in the struct, but
// value has not been defined.
return MapVector.VALUE_NAME;
} else {
return structField.getChildren().get(1).getName();
}
default:
throw new UnsupportedOperationException("Cannot get field name in OFF mode");
}
}

<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
Expand All @@ -149,9 +176,9 @@ public UnionMapWriter value() {
public ${name}Writer ${uncappedName}() {
switch (mode) {
case KEY:
return entryWriter.${uncappedName}(MapVector.KEY_NAME);
return entryWriter.${uncappedName}(getWriteFieldName());
case VALUE:
return entryWriter.${uncappedName}(MapVector.VALUE_NAME);
return entryWriter.${uncappedName}(getWriteFieldName());
default:
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,11 @@
*/
public class MapVector extends ListVector {

/** The default name of the key field in the MapVector. */
public static final String KEY_NAME = "key";
/** The default name of the value field in the MapVector. */
public static final String VALUE_NAME = "value";

public static final String DATA_VECTOR_NAME = "entries";

/**
Expand Down Expand Up @@ -74,6 +77,13 @@ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, Ca
defaultDataVectorName = DATA_VECTOR_NAME;
}

/**
* Construct a MapVector instance.
*
* @param field The name of the field.
* @param allocator The allocator used for allocating/reallocating buffers.
* @param callBack A schema change callback.
*/
public MapVector(Field field, BufferAllocator allocator, CallBack callBack) {
super(field, allocator, callBack);
defaultDataVectorName = DATA_VECTOR_NAME;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand All @@ -39,6 +40,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.JsonStringArrayList;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.jupiter.api.AfterEach;
Expand Down Expand Up @@ -1204,4 +1206,50 @@ public void testMakeTransferPairPreserveNullability() {
assertEquals(intField, vec.getField().getChildren().get(0));
assertEquals(intField, res.getField().getChildren().get(0));
}

@Test
public void testValidateKeyValueFieldNames() {
FieldType keyType = FieldType.notNullable(MinorType.BIGINT.getType());
FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType());

Field keyField = new Field("myKey", keyType, null);
Field valueField = new Field("myValue", valueType, null);

List<Field> structFields = Arrays.asList(keyField, valueField);

Field structField =
new Field("entry", FieldType.notNullable(ArrowType.Struct.INSTANCE), structFields);
Field mapField =
new Field(
"mapField",
FieldType.notNullable(new ArrowType.Map(false)),
Collections.singletonList(structField));

Schema schema = new Schema(Collections.singletonList(mapField));

try (VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator);
MapVector mapVector = (MapVector) vectorSchemaRoot.getVector("mapField")) {
UnionMapWriter mapWriter = mapVector.getWriter();
mapWriter.setPosition(0);
mapWriter.startMap();
for (int i = 0; i < 3; i++) {
mapWriter.startEntry();
mapWriter.key().bigInt().writeBigInt(i);
mapWriter.value().bigInt().writeBigInt(i * 7);
mapWriter.endEntry();
}
mapWriter.endMap();
mapWriter.setValueCount(1);
vectorSchemaRoot.setRowCount(1);

FieldVector structVector = mapVector.getChildrenFromFields().get(0);
FieldVector keyVector = structVector.getChildrenFromFields().get(0);
FieldVector valueVector = structVector.getChildrenFromFields().get(1);

assertEquals(keyField.getName(), keyVector.getField().getName());
assertEquals(valueField.getName(), valueVector.getField().getName());

assertEquals(structVector.getChildrenFromFields().size(), 2);
}
}
}