Skip to content
4 changes: 4 additions & 0 deletions .palantir/revapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1254,6 +1254,10 @@ acceptedBreaks:
new: "method void org.apache.iceberg.data.parquet.BaseParquetWriter<T>::<init>()"
justification: "Changing deprecated code"
"1.9.0":
org.apache.iceberg:iceberg-api:
- code: "java.method.addedToInterface"
new: "method boolean org.apache.iceberg.Accessor<T>::hasOptionalFieldInPath()"
justification: "All subclasses implement the method"
org.apache.iceberg:iceberg-common:
- code: "java.method.visibilityReduced"
old: "method <R> R org.apache.iceberg.common.DynConstructors.Ctor<C>::invokeChecked(java.lang.Object,\
Expand Down
3 changes: 3 additions & 0 deletions api/src/main/java/org/apache/iceberg/Accessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ public interface Accessor<T> extends Serializable {
Object get(T container);

Type type();

/** Returns true if the current field or any ancestor in the access path is optional. */
boolean hasOptionalFieldInPath();
}
50 changes: 39 additions & 11 deletions api/src/main/java/org/apache/iceberg/Accessors.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ private static class PositionAccessor implements Accessor<StructLike> {
private final int position;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

PositionAccessor(int pos, Type type) {
PositionAccessor(int pos, Type type, boolean isOptional) {
this.position = pos;
this.type = type;
this.javaClass = type.typeId().javaClass();
this.hasOptionalFieldInPath = isOptional;
}

@Override
Expand All @@ -84,6 +86,11 @@ public Class<?> javaClass() {
return javaClass;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + position + "], type=" + type + ")";
Expand All @@ -95,12 +102,14 @@ private static class Position2Accessor implements Accessor<StructLike> {
private final int p1;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

Position2Accessor(int pos, PositionAccessor wrapped) {
Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.position();
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath();
}

@Override
Expand All @@ -117,6 +126,11 @@ public Class<?> javaClass() {
return javaClass;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + "], type=" + type + ")";
Expand All @@ -129,13 +143,15 @@ private static class Position3Accessor implements Accessor<StructLike> {
private final int p2;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

Position3Accessor(int pos, Position2Accessor wrapped) {
Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.p0;
this.p2 = wrapped.p1;
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath();
}

@Override
Expand All @@ -148,6 +164,11 @@ public Type type() {
return type;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + ", " + p2 + "], type=" + type + ")";
Expand All @@ -157,10 +178,12 @@ public String toString() {
private static class WrappedPositionAccessor implements Accessor<StructLike> {
private final int position;
private final Accessor<StructLike> accessor;
private final boolean hasOptionalFieldInPath;

WrappedPositionAccessor(int pos, Accessor<StructLike> accessor) {
WrappedPositionAccessor(int pos, Accessor<StructLike> accessor, boolean isOptional) {
this.position = pos;
this.accessor = accessor;
this.hasOptionalFieldInPath = isOptional || accessor.hasOptionalFieldInPath();
}

@Override
Expand All @@ -177,27 +200,32 @@ public Type type() {
return accessor.type();
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "WrappedAccessor(position=" + position + ", wrapped=" + accessor + ")";
}
}

private static Accessor<StructLike> newAccessor(int pos, Type type) {
return new PositionAccessor(pos, type);
private static Accessor<StructLike> newAccessor(int pos, boolean isOptional, Type type) {
return new PositionAccessor(pos, type, isOptional);
}

private static Accessor<StructLike> newAccessor(
int pos, boolean isOptional, Accessor<StructLike> accessor) {
if (isOptional) {
// the wrapped position handles null layers
return new WrappedPositionAccessor(pos, accessor);
return new WrappedPositionAccessor(pos, accessor, isOptional);
} else if (accessor.getClass() == PositionAccessor.class) {
return new Position2Accessor(pos, (PositionAccessor) accessor);
return new Position2Accessor(pos, (PositionAccessor) accessor, isOptional);
} else if (accessor instanceof Position2Accessor) {
return new Position3Accessor(pos, (Position2Accessor) accessor);
return new Position3Accessor(pos, (Position2Accessor) accessor, isOptional);
} else {
return new WrappedPositionAccessor(pos, accessor);
return new WrappedPositionAccessor(pos, accessor, isOptional);
}
}

Expand Down Expand Up @@ -226,7 +254,7 @@ public Map<Integer, Accessor<StructLike>> struct(
}

// Add an accessor for this field as an Object (may or may not be primitive).
accessors.put(field.fieldId(), newAccessor(i, field.type()));
accessors.put(field.fieldId(), newAccessor(i, field.isOptional(), field.type()));
}

return accessors;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ public Type type() {

@Override
public boolean producesNull() {
return field.isOptional();
// A leaf required field can evaluate to null if it is optional itself or any
// ancestor on the path is optional.
return accessor.hasOptionalFieldInPath();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.expressions;

import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
import org.apache.iceberg.Accessor;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.types.Types;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class TestBoundReference {
// Build a schema with a single nested struct with optionalList.size() levels with the following
// structure:
// s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int))))
// where each s{i} is an optional struct if optionalList.get(i) is true and a required struct if
// false
private static Schema buildSchemaFromOptionalList(List<Boolean> optionalList, String leafName) {
if (optionalList == null || optionalList.isEmpty()) {
throw new IllegalArgumentException("optionalList must not be empty");
}

Types.NestedField leaf =
optionalList.get(optionalList.size() - 1)
? optional(optionalList.size(), leafName, Types.IntegerType.get())
: required(optionalList.size(), leafName, Types.IntegerType.get());

Types.StructType current = Types.StructType.of(leaf);

for (int i = optionalList.size() - 2; i >= 0; i--) {
int id = i + 1;
String name = "s" + (i + 1);
current =
Types.StructType.of(
optionalList.get(i) ? optional(id, name, current) : required(id, name, current));
}

return new Schema(current.fields());
}

private static Stream<Arguments> producesNullCases() {
return Stream.of(
// basic fields, no struct levels
Arguments.of(Arrays.asList(false), false),
Arguments.of(Arrays.asList(true), true),
// one level
Arguments.of(Arrays.asList(false, false), false),
Arguments.of(Arrays.asList(false, true), true),
Arguments.of(Arrays.asList(true, false), true),
// two levels
Arguments.of(Arrays.asList(false, false, false), false),
Arguments.of(Arrays.asList(false, false, true), true),
Arguments.of(Arrays.asList(true, false, false), true),
Arguments.of(Arrays.asList(false, true, false), true),
// three levels
Arguments.of(Arrays.asList(false, false, false, false), false),
Arguments.of(Arrays.asList(false, false, false, true), true),
Arguments.of(Arrays.asList(true, false, false, false), true),
Arguments.of(Arrays.asList(false, true, false, false), true),
// four levels
Arguments.of(Arrays.asList(false, false, false, false, false), false),
Arguments.of(Arrays.asList(false, false, false, false, true), true),
Arguments.of(Arrays.asList(true, false, false, false, false), true),
Arguments.of(Arrays.asList(false, true, true, true, false), true));
}

@ParameterizedTest
@MethodSource("producesNullCases")
public void testProducesNull(List<Boolean> optionalList, boolean expectedProducesNull) {
String leafName = "leaf";
Schema schema = buildSchemaFromOptionalList(optionalList, leafName);
int leafId = optionalList.size();
Types.NestedField leafField = schema.findField(leafId);
Accessor<StructLike> accessor = schema.accessorForField(leafId);

BoundReference<Integer> ref = new BoundReference<>(leafField, accessor, leafName);
assertThat(ref.producesNull()).isEqualTo(expectedProducesNull);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,7 @@ public void testNotNull() {
shouldRead =
new ParquetBloomRowGroupFilter(SCHEMA, notNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
.as("Should read: this field is required and are always not-null")
.isTrue();
assertThat(shouldRead).as("Should read: bloom filter doesn't help").isTrue();
}

@Test
Expand All @@ -323,8 +321,8 @@ public void testIsNull() {
new ParquetBloomRowGroupFilter(SCHEMA, isNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
.as("Should skip: this field is required and are always not-null")
.isFalse();
.as("Should read: required nested field can still be null if any ancestor is optional")
.isTrue();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,28 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() {
ImmutableList.of(row(4, Double.NEGATIVE_INFINITY)));
}

@TestTemplate
public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() {
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>)"
+ "USING iceberg ",
tableName);
configurePlanningMode(planningMode);

sql("INSERT INTO %s VALUES (0, NULL)", tableName);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName);

checkOnlyIcebergFilters(
"address.street is null" /* query predicate */,
"address.street IS NULL" /* Iceberg scan filters */,
ImmutableList.of(row(0, null)));

checkOnlyIcebergFilters(
"address.street is not null" /* query predicate */,
"address.street IS NOT NULL" /* Iceberg scan filters */,
ImmutableList.of(row(1, row("123 Main St"))));
}

private void checkOnlyIcebergFilters(
String predicate, String icebergFilters, List<Object[]> expectedRows) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,28 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() {
ImmutableList.of(row(4, Double.NEGATIVE_INFINITY)));
}

@TestTemplate
public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() {
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>)"
+ "USING iceberg ",
tableName);
configurePlanningMode(planningMode);

sql("INSERT INTO %s VALUES (0, NULL)", tableName);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName);

checkOnlyIcebergFilters(
"address.street is null" /* query predicate */,
"address.street IS NULL" /* Iceberg scan filters */,
ImmutableList.of(row(0, null)));

checkOnlyIcebergFilters(
"address.street is not null" /* query predicate */,
"address.street IS NOT NULL" /* Iceberg scan filters */,
ImmutableList.of(row(1, row("123 Main St"))));
}

private void checkOnlyIcebergFilters(
String predicate, String icebergFilters, List<Object[]> expectedRows) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,28 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() {
ImmutableList.of(row(4, Double.NEGATIVE_INFINITY)));
}

@TestTemplate
public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() {
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>)"
+ "USING iceberg ",
tableName);
configurePlanningMode(planningMode);

sql("INSERT INTO %s VALUES (0, NULL)", tableName);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName);

checkOnlyIcebergFilters(
"address.street is null" /* query predicate */,
"address.street IS NULL" /* Iceberg scan filters */,
ImmutableList.of(row(0, null)));

checkOnlyIcebergFilters(
"address.street is not null" /* query predicate */,
"address.street IS NOT NULL" /* Iceberg scan filters */,
ImmutableList.of(row(1, row("123 Main St"))));
}

private void checkOnlyIcebergFilters(
String predicate, String icebergFilters, List<Object[]> expectedRows) {

Expand Down