Skip to content
4 changes: 4 additions & 0 deletions .palantir/revapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1254,6 +1254,10 @@ acceptedBreaks:
new: "method void org.apache.iceberg.data.parquet.BaseParquetWriter<T>::<init>()"
justification: "Changing deprecated code"
"1.9.0":
org.apache.iceberg:iceberg-api:
Comment thread
dejangvozdenac marked this conversation as resolved.
Outdated
- code: "java.method.addedToInterface"
new: "method boolean org.apache.iceberg.Accessor<T>::hasOptionalFieldInPath()"
justification: "All subclasses implement the method"
org.apache.iceberg:iceberg-common:
- code: "java.method.visibilityReduced"
old: "method <R> R org.apache.iceberg.common.DynConstructors.Ctor<C>::invokeChecked(java.lang.Object,\
Expand Down
3 changes: 3 additions & 0 deletions api/src/main/java/org/apache/iceberg/Accessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ public interface Accessor<T> extends Serializable {
Object get(T container);

Type type();

/** Returns true if the current field or any ancestor in the access path is optional. */
boolean hasOptionalFieldInPath();
Comment thread
dejangvozdenac marked this conversation as resolved.
Outdated
}
50 changes: 39 additions & 11 deletions api/src/main/java/org/apache/iceberg/Accessors.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ private static class PositionAccessor implements Accessor<StructLike> {
private final int position;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

PositionAccessor(int pos, Type type) {
PositionAccessor(int pos, Type type, boolean isOptional) {
this.position = pos;
this.type = type;
this.javaClass = type.typeId().javaClass();
this.hasOptionalFieldInPath = isOptional;
}

@Override
Expand All @@ -84,6 +86,11 @@ public Class<?> javaClass() {
return javaClass;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + position + "], type=" + type + ")";
Expand All @@ -95,12 +102,14 @@ private static class Position2Accessor implements Accessor<StructLike> {
private final int p1;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

Position2Accessor(int pos, PositionAccessor wrapped) {
Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.position();
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath();
}

@Override
Expand All @@ -117,6 +126,11 @@ public Class<?> javaClass() {
return javaClass;
}

@Override
public boolean hasOptionalFieldInPath() {
Comment thread
pvary marked this conversation as resolved.
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + "], type=" + type + ")";
Expand All @@ -129,13 +143,15 @@ private static class Position3Accessor implements Accessor<StructLike> {
private final int p2;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

Position3Accessor(int pos, Position2Accessor wrapped) {
Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.p0;
this.p2 = wrapped.p1;
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath();
}

@Override
Expand All @@ -148,6 +164,11 @@ public Type type() {
return type;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + ", " + p2 + "], type=" + type + ")";
Expand All @@ -157,10 +178,12 @@ public String toString() {
private static class WrappedPositionAccessor implements Accessor<StructLike> {
private final int position;
private final Accessor<StructLike> accessor;
private final boolean hasOptionalFieldInPath;

WrappedPositionAccessor(int pos, Accessor<StructLike> accessor) {
WrappedPositionAccessor(int pos, Accessor<StructLike> accessor, boolean isOptional) {
this.position = pos;
this.accessor = accessor;
this.hasOptionalFieldInPath = isOptional || accessor.hasOptionalFieldInPath();
}

@Override
Expand All @@ -177,27 +200,32 @@ public Type type() {
return accessor.type();
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "WrappedAccessor(position=" + position + ", wrapped=" + accessor + ")";
}
}

private static Accessor<StructLike> newAccessor(int pos, Type type) {
return new PositionAccessor(pos, type);
private static Accessor<StructLike> newAccessor(int pos, boolean isOptional, Type type) {
return new PositionAccessor(pos, type, isOptional);
}

private static Accessor<StructLike> newAccessor(
int pos, boolean isOptional, Accessor<StructLike> accessor) {
if (isOptional) {
// the wrapped position handles null layers
return new WrappedPositionAccessor(pos, accessor);
return new WrappedPositionAccessor(pos, accessor, isOptional);
} else if (accessor.getClass() == PositionAccessor.class) {
return new Position2Accessor(pos, (PositionAccessor) accessor);
return new Position2Accessor(pos, (PositionAccessor) accessor, isOptional);
} else if (accessor instanceof Position2Accessor) {
return new Position3Accessor(pos, (Position2Accessor) accessor);
return new Position3Accessor(pos, (Position2Accessor) accessor, isOptional);
} else {
return new WrappedPositionAccessor(pos, accessor);
return new WrappedPositionAccessor(pos, accessor, isOptional);
}
}

Expand Down Expand Up @@ -226,7 +254,7 @@ public Map<Integer, Accessor<StructLike>> struct(
}

// Add an accessor for this field as an Object (may or may not be primitive).
accessors.put(field.fieldId(), newAccessor(i, field.type()));
accessors.put(field.fieldId(), newAccessor(i, field.isOptional(), field.type()));
}

return accessors;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ public Type type() {

@Override
public boolean producesNull() {
return field.isOptional();
// A leaf required field can evaluate to null if it is optional itself or any
// ancestor on the path is optional.
return accessor.hasOptionalFieldInPath();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.expressions;

import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;

import org.apache.iceberg.Accessor;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.types.Types;
import org.junit.jupiter.api.Test;

public class TestBoundReference {
Comment thread
dejangvozdenac marked this conversation as resolved.
@Test
public void testProducesNullNonNestedField() {
// schema: x (required), y (optional)
Schema schema =
new Schema(
required(1, "x", Types.IntegerType.get()), optional(2, "y", Types.IntegerType.get()));

Types.NestedField requiredField = schema.findField(1);
Accessor<StructLike> requiredAccessor = schema.accessorForField(1);

BoundReference<Integer> requiredRef =
new BoundReference<>(requiredField, requiredAccessor, "x");
assertThat(requiredRef.producesNull()).isFalse();

Types.NestedField optionalField = schema.findField(2);
Accessor<StructLike> optionalAccessor = schema.accessorForField(2);

BoundReference<Integer> optionalRef =
new BoundReference<>(optionalField, optionalAccessor, "y");
assertThat(optionalRef.producesNull()).isTrue();
}

@Test
public void testProducesNullRequiredLeafNoOptional_ancestors() {
// schema: s1 (required) -> s2 (required)
Schema schema =
new Schema(
required(1, "s1", Types.StructType.of(required(2, "s2", Types.IntegerType.get()))));

Types.NestedField requiredLeafField = schema.findField(2);
Accessor<StructLike> accessor = schema.accessorForField(2);

BoundReference<Integer> ref = new BoundReference<>(requiredLeafField, accessor, "s2");
assertThat(ref.producesNull()).isFalse();
}

@Test
public void testProducesNullOptionalLeaf() {
// schema: s1 (required) -> s2 (optional)
Schema schema =
new Schema(
required(1, "s1", Types.StructType.of(optional(2, "s2", Types.IntegerType.get()))));

Types.NestedField optionalLeafField = schema.findField(2);
Accessor<StructLike> accessor = schema.accessorForField(2);

BoundReference<Integer> ref = new BoundReference<>(optionalLeafField, accessor, "s2");
assertThat(ref.producesNull()).isTrue();
}

@Test
public void testProducesNullRequiredLeafWithOptionalTopAncestor() {
// schema: s1 (optional) -> s2 (required)
Schema schema =
new Schema(
optional(1, "s1", Types.StructType.of(required(2, "s2", Types.IntegerType.get()))));

Types.NestedField requiredLeafField = schema.findField(2);
Accessor<StructLike> accessor = schema.accessorForField(2);

BoundReference<Integer> ref = new BoundReference<>(requiredLeafField, accessor, "s2");
assertThat(ref.producesNull()).isTrue();
}

@Test
public void testProducesNullRequiredLeafWithOptionalIntermediateAncestor() {
// schema: s1 (required) -> s2 (optional) -> s3 (required) -> s4 (required)
Schema schema =
new Schema(
required(
1,
"s1",
Types.StructType.of(
optional(
2,
"s2",
Types.StructType.of(
required(
3,
"s3",
Types.StructType.of(
required(4, "s4", Types.IntegerType.get()))))))));

Types.NestedField requiredLeafField = schema.findField(4);
Accessor<StructLike> accessor = schema.accessorForField(4);

BoundReference<Integer> ref = new BoundReference<>(requiredLeafField, accessor, "s4");
assertThat(ref.producesNull()).isTrue();
}

@Test
public void testProducesNullRequiredNestedField4() {
// schema: s1 (required) -> s2 (required) -> s3 (required) -> s4 (required) -> s5 (required)
Schema schema =
new Schema(
required(
1,
"s1",
Types.StructType.of(
required(
2,
"s2",
Types.StructType.of(
required(
3,
"s3",
Types.StructType.of(
required(
4,
"s4",
Types.StructType.of(
required(5, "s5", Types.IntegerType.get()))))))))));

Types.NestedField requiredLeafField = schema.findField(5);
Accessor<StructLike> accessor = schema.accessorForField(5);

BoundReference<Integer> ref = new BoundReference<>(requiredLeafField, accessor, "s5");
assertThat(ref.producesNull()).isFalse();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,7 @@ public void testNotNull() {
shouldRead =
new ParquetBloomRowGroupFilter(SCHEMA, notNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
.as("Should read: this field is required and are always not-null")
.isTrue();
assertThat(shouldRead).as("Should read: bloom filter doesn't help").isTrue();
}

@Test
Expand All @@ -323,8 +321,8 @@ public void testIsNull() {
new ParquetBloomRowGroupFilter(SCHEMA, isNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
.as("Should skip: this field is required and are always not-null")
.isFalse();
.as("Should read: required nested field can still be null if any ancestor is optional")
.isTrue();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,28 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() {
ImmutableList.of(row(4, Double.NEGATIVE_INFINITY)));
}

@TestTemplate
public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() {
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>)"
+ "USING iceberg ",
tableName);
configurePlanningMode(planningMode);

sql("INSERT INTO %s VALUES (0, NULL)", tableName);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName);

checkOnlyIcebergFilters(
"address is null" /* query predicate */,
"address IS NULL" /* Iceberg scan filters */,
ImmutableList.of(row(0, null)));

checkOnlyIcebergFilters(
"address is not null" /* query predicate */,
"address IS NOT NULL" /* Iceberg scan filters */,
ImmutableList.of(row(1, row("123 Main St"))));
}

private void checkOnlyIcebergFilters(
String predicate, String icebergFilters, List<Object[]> expectedRows) {

Expand Down
Loading