From f0565951465c39cb76923c0db9a84525dcc9bf6f Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Wed, 13 Aug 2025 10:55:15 -0400 Subject: [PATCH 01/12] add ancestor check to field optional is null check --- .../java/org/apache/iceberg/Accessor.java | 10 +++ .../java/org/apache/iceberg/Accessors.java | 70 ++++++++++++++++--- .../iceberg/expressions/BoundReference.java | 5 +- 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/Accessor.java b/api/src/main/java/org/apache/iceberg/Accessor.java index 2a20a04df91a..0b884d060b23 100644 --- a/api/src/main/java/org/apache/iceberg/Accessor.java +++ b/api/src/main/java/org/apache/iceberg/Accessor.java @@ -25,4 +25,14 @@ public interface Accessor extends Serializable { Object get(T container); Type type(); + + /** + * Returns true if any ancestor in the access path to this field is optional. + */ + boolean hasOptionalAncestor(); + + /** + * Returns true if this accessor is optional. + */ + boolean isOptional(); } diff --git a/api/src/main/java/org/apache/iceberg/Accessors.java b/api/src/main/java/org/apache/iceberg/Accessors.java index 0b36730fbb4b..7ca8b61de452 100644 --- a/api/src/main/java/org/apache/iceberg/Accessors.java +++ b/api/src/main/java/org/apache/iceberg/Accessors.java @@ -59,11 +59,13 @@ private static class PositionAccessor implements Accessor { private final int position; private final Type type; private final Class javaClass; + private final boolean isOptional; - PositionAccessor(int pos, Type type) { + PositionAccessor(int pos, boolean isOptional, Type type) { this.position = pos; this.type = type; this.javaClass = type.typeId().javaClass(); + this.isOptional = isOptional; } @Override @@ -84,6 +86,16 @@ public Class javaClass() { return javaClass; } + @Override + public boolean isOptional() { + return isOptional; + } + + @Override + public boolean hasOptionalAncestor() { + return false; + } + @Override public String toString() { return "Accessor(positions=[" + position + "], type=" + type + ")"; @@ -95,12 +107,14 @@ private static class Position2Accessor implements Accessor { private final int p1; private final Type type; private final Class javaClass; + private final boolean isOptional; - Position2Accessor(int pos, PositionAccessor wrapped) { + Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) { this.p0 = pos; this.p1 = wrapped.position(); this.type = wrapped.type(); this.javaClass = wrapped.javaClass(); + this.isOptional = isOptional; } @Override @@ -117,6 +131,16 @@ public Class javaClass() { return javaClass; } + @Override + public boolean isOptional() { + return isOptional; + } + + @Override + public boolean hasOptionalAncestor() { + return false; + } + @Override public String toString() { return "Accessor(positions=[" + p0 + ", " + p1 + "], type=" + type + ")"; @@ -129,13 +153,15 @@ private static class Position3Accessor implements Accessor { private final int p2; private final Type type; private final Class javaClass; + private final boolean isOptional; - Position3Accessor(int pos, Position2Accessor wrapped) { + Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) { this.p0 = pos; this.p1 = wrapped.p0; this.p2 = wrapped.p1; this.type = wrapped.type(); this.javaClass = wrapped.javaClass(); + this.isOptional = isOptional; } @Override @@ -148,6 +174,16 @@ public Type type() { return type; } + @Override + public boolean isOptional() { + return isOptional; + } + + @Override + public boolean hasOptionalAncestor() { + return false; + } + @Override public String toString() { return "Accessor(positions=[" + p0 + ", " + p1 + ", " + p2 + "], type=" + type + ")"; @@ -157,10 +193,12 @@ public String toString() { private static class WrappedPositionAccessor implements Accessor { private final int position; private final Accessor accessor; + private final boolean isOptional; - WrappedPositionAccessor(int pos, Accessor accessor) { + WrappedPositionAccessor(int pos, Accessor accessor, boolean isOptional) { this.position = pos; this.accessor = accessor; + this.isOptional = isOptional; } @Override @@ -177,27 +215,37 @@ public Type type() { return accessor.type(); } + @Override + public boolean isOptional() { + return isOptional; + } + + @Override + public boolean hasOptionalAncestor() { + return accessor.isOptional() || accessor.hasOptionalAncestor(); + } + @Override public String toString() { return "WrappedAccessor(position=" + position + ", wrapped=" + accessor + ")"; } } - private static Accessor newAccessor(int pos, Type type) { - return new PositionAccessor(pos, type); + private static Accessor newAccessor(int pos, boolean isOptional, Type type) { + return new PositionAccessor(pos, isOptional, type); } private static Accessor newAccessor( int pos, boolean isOptional, Accessor accessor) { if (isOptional) { // the wrapped position handles null layers - return new WrappedPositionAccessor(pos, accessor); + return new WrappedPositionAccessor(pos, accessor, isOptional); } else if (accessor.getClass() == PositionAccessor.class) { - return new Position2Accessor(pos, (PositionAccessor) accessor); + return new Position2Accessor(pos, (PositionAccessor) accessor, isOptional); } else if (accessor instanceof Position2Accessor) { - return new Position3Accessor(pos, (Position2Accessor) accessor); + return new Position3Accessor(pos, (Position2Accessor) accessor, isOptional); } else { - return new WrappedPositionAccessor(pos, accessor); + return new WrappedPositionAccessor(pos, accessor, isOptional); } } @@ -226,7 +274,7 @@ public Map> struct( } // Add an accessor for this field as an Object (may or may not be primitive). - accessors.put(field.fieldId(), newAccessor(i, field.type())); + accessors.put(field.fieldId(), newAccessor(i, field.isOptional(), field.type())); } return accessors; diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java index 0295fe518fc3..7cfa9b45291d 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java @@ -57,7 +57,10 @@ public Type type() { @Override public boolean producesNull() { - return field.isOptional(); + // A leaf required field can still evaluate to null if any ancestor struct on the path + // is optional and is null at runtime. Accessor implementations indicate this via + // hasOptionalAncestor(). + return field.isOptional() || accessor.hasOptionalAncestor(); } @Override From a05ba32d6f2edcdea31193a8b63d383e508cc758 Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Thu, 14 Aug 2025 11:15:56 -0400 Subject: [PATCH 02/12] fix tests and simplify --- .palantir/revapi.yml | 4 ++ .../java/org/apache/iceberg/Accessor.java | 11 +--- .../java/org/apache/iceberg/Accessors.java | 52 ++++++------------- .../iceberg/expressions/BoundReference.java | 7 ++- .../parquet/TestBloomRowGroupFilter.java | 8 +-- 5 files changed, 27 insertions(+), 55 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 1c871ba30c04..4a91905499b2 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1254,6 +1254,10 @@ acceptedBreaks: new: "method void org.apache.iceberg.data.parquet.BaseParquetWriter::()" justification: "Changing deprecated code" "1.9.0": + org.apache.iceberg:iceberg-api: + - code: "java.method.addedToInterface" + new: "method boolean org.apache.iceberg.Accessor::hasOptionalFieldInPath()" + justification: "All subclasses implement name" org.apache.iceberg:iceberg-common: - code: "java.method.visibilityReduced" old: "method R org.apache.iceberg.common.DynConstructors.Ctor::invokeChecked(java.lang.Object,\ diff --git a/api/src/main/java/org/apache/iceberg/Accessor.java b/api/src/main/java/org/apache/iceberg/Accessor.java index 0b884d060b23..9e030c708132 100644 --- a/api/src/main/java/org/apache/iceberg/Accessor.java +++ b/api/src/main/java/org/apache/iceberg/Accessor.java @@ -26,13 +26,6 @@ public interface Accessor extends Serializable { Type type(); - /** - * Returns true if any ancestor in the access path to this field is optional. - */ - boolean hasOptionalAncestor(); - - /** - * Returns true if this accessor is optional. - */ - boolean isOptional(); + /** Returns true if the current field or any ancestor in the access path is optional. */ + boolean hasOptionalFieldInPath(); } diff --git a/api/src/main/java/org/apache/iceberg/Accessors.java b/api/src/main/java/org/apache/iceberg/Accessors.java index 7ca8b61de452..63578e39d90d 100644 --- a/api/src/main/java/org/apache/iceberg/Accessors.java +++ b/api/src/main/java/org/apache/iceberg/Accessors.java @@ -59,13 +59,13 @@ private static class PositionAccessor implements Accessor { private final int position; private final Type type; private final Class javaClass; - private final boolean isOptional; + private final boolean hasOptionalFieldInPath; PositionAccessor(int pos, boolean isOptional, Type type) { this.position = pos; this.type = type; this.javaClass = type.typeId().javaClass(); - this.isOptional = isOptional; + this.hasOptionalFieldInPath = isOptional; } @Override @@ -87,13 +87,8 @@ public Class javaClass() { } @Override - public boolean isOptional() { - return isOptional; - } - - @Override - public boolean hasOptionalAncestor() { - return false; + public boolean hasOptionalFieldInPath() { + return hasOptionalFieldInPath; } @Override @@ -107,14 +102,14 @@ private static class Position2Accessor implements Accessor { private final int p1; private final Type type; private final Class javaClass; - private final boolean isOptional; + private final boolean hasOptionalFieldInPath; Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) { this.p0 = pos; this.p1 = wrapped.position(); this.type = wrapped.type(); this.javaClass = wrapped.javaClass(); - this.isOptional = isOptional; + this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath(); } @Override @@ -132,13 +127,8 @@ public Class javaClass() { } @Override - public boolean isOptional() { - return isOptional; - } - - @Override - public boolean hasOptionalAncestor() { - return false; + public boolean hasOptionalFieldInPath() { + return hasOptionalFieldInPath; } @Override @@ -153,7 +143,7 @@ private static class Position3Accessor implements Accessor { private final int p2; private final Type type; private final Class javaClass; - private final boolean isOptional; + private final boolean hasOptionalFieldInPath; Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) { this.p0 = pos; @@ -161,7 +151,7 @@ private static class Position3Accessor implements Accessor { this.p2 = wrapped.p1; this.type = wrapped.type(); this.javaClass = wrapped.javaClass(); - this.isOptional = isOptional; + this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath(); } @Override @@ -175,13 +165,8 @@ public Type type() { } @Override - public boolean isOptional() { - return isOptional; - } - - @Override - public boolean hasOptionalAncestor() { - return false; + public boolean hasOptionalFieldInPath() { + return hasOptionalFieldInPath; } @Override @@ -193,12 +178,12 @@ public String toString() { private static class WrappedPositionAccessor implements Accessor { private final int position; private final Accessor accessor; - private final boolean isOptional; + private final boolean hasOptionalFieldInPath; WrappedPositionAccessor(int pos, Accessor accessor, boolean isOptional) { this.position = pos; this.accessor = accessor; - this.isOptional = isOptional; + this.hasOptionalFieldInPath = isOptional || accessor.hasOptionalFieldInPath(); } @Override @@ -216,13 +201,8 @@ public Type type() { } @Override - public boolean isOptional() { - return isOptional; - } - - @Override - public boolean hasOptionalAncestor() { - return accessor.isOptional() || accessor.hasOptionalAncestor(); + public boolean hasOptionalFieldInPath() { + return hasOptionalFieldInPath; } @Override diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java index 7cfa9b45291d..0ffd5b465231 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java @@ -57,10 +57,9 @@ public Type type() { @Override public boolean producesNull() { - // A leaf required field can still evaluate to null if any ancestor struct on the path - // is optional and is null at runtime. Accessor implementations indicate this via - // hasOptionalAncestor(). - return field.isOptional() || accessor.hasOptionalAncestor(); + // A leaf required field can evaluate to null if it is optional itself or any + // ancestor on the path is optional. + return field.isOptional() || accessor.hasOptionalFieldInPath(); } @Override diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java index f54ddfcc5a27..35be4863d10a 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java @@ -297,9 +297,7 @@ public void testNotNull() { shouldRead = new ParquetBloomRowGroupFilter(SCHEMA, notNull("struct_not_null.int_field")) .shouldRead(parquetSchema, rowGroupMetadata, bloomStore); - assertThat(shouldRead) - .as("Should read: this field is required and are always not-null") - .isTrue(); + assertThat(shouldRead).as("Should read: bloom filter doesn't help").isTrue(); } @Test @@ -322,9 +320,7 @@ public void testIsNull() { shouldRead = new ParquetBloomRowGroupFilter(SCHEMA, isNull("struct_not_null.int_field")) .shouldRead(parquetSchema, rowGroupMetadata, bloomStore); - assertThat(shouldRead) - .as("Should skip: this field is required and are always not-null") - .isFalse(); + assertThat(shouldRead).as("Should read: bloom filter doesn't help").isTrue(); } @Test From 115704b02c0b6b642289a86f3a649f5431e6d5d2 Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Thu, 14 Aug 2025 15:46:00 -0400 Subject: [PATCH 03/12] add test --- .../TestBoundReferenceProducesNull.java | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 api/src/test/java/org/apache/iceberg/expressions/TestBoundReferenceProducesNull.java diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReferenceProducesNull.java b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReferenceProducesNull.java new file mode 100644 index 000000000000..722f073b976c --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReferenceProducesNull.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.expressions; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.iceberg.Accessor; +import org.apache.iceberg.Schema; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestBoundReferenceProducesNull { + @Test + public void testNonNestedField() { + // schema: required, optional + Schema schema = + new Schema( + required(1, "required", Types.IntegerType.get()), + optional(2, "optional", Types.IntegerType.get())); + + Types.NestedField requiredField = schema.findField(1); + Accessor requiredAccessor = schema.accessorForField(1); + + BoundReference requiredRef = + new BoundReference<>(requiredField, requiredAccessor, "required"); + assertThat(requiredRef.producesNull()).isFalse(); + + Types.NestedField optionalField = schema.findField(2); + Accessor optionalAccessor = schema.accessorForField(2); + + BoundReference optionalRef = + new BoundReference<>(optionalField, optionalAccessor, "optional"); + assertThat(optionalRef.producesNull()).isTrue(); + } + + @Test + public void testRequiredLeafNoOptionalAncestors() { + // schema: requiredAncestor -> requiredLeaf + Schema schema = + new Schema( + required( + 1, + "requiredAncestor", + Types.StructType.of(required(2, "requiredLeaf", Types.IntegerType.get())))); + + Types.NestedField requiredLeafField = schema.findField(2); + Accessor accessor = schema.accessorForField(2); + + BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "requiredLeaf"); + assertThat(ref.producesNull()).isFalse(); + } + + @Test + public void testOptionalLeaf() { + // schema: requiredAncestor -> optionalLeaf + Schema schema = + new Schema( + required( + 1, + "requiredAncestor", + Types.StructType.of(optional(2, "optionalLeaf", Types.IntegerType.get())))); + + Types.NestedField optionalLeafField = schema.findField(2); + Accessor accessor = schema.accessorForField(2); + + BoundReference ref = new BoundReference<>(optionalLeafField, accessor, "optionalLeaf"); + assertThat(ref.producesNull()).isTrue(); + } + + @Test + public void testRequiredLeafWithOptionalTopAncestor() { + // schema: optionalAncestor -> requiredLeaf + Schema schema = + new Schema( + optional( + 1, + "optionalAncestor", + Types.StructType.of(required(2, "requiredLeaf", Types.IntegerType.get())))); + + Types.NestedField requiredLeafField = schema.findField(2); + Accessor accessor = schema.accessorForField(2); + + BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "requiredLeaf"); + assertThat(ref.producesNull()).isTrue(); + } + + @Test + public void testRequiredLeafWithOptionalIntermediateAncestor() { + // schema: requiredRoot -> optionalIntermediate -> requiredIntermediate -> requiredLeaf + Schema schema = + new Schema( + required( + 1, + "requiredRoot", + Types.StructType.of( + optional( + 2, + "optionalIntermediate", + Types.StructType.of( + required( + 3, + "requiredIntermediate", + Types.StructType.of( + required(4, "requiredLeaf", Types.IntegerType.get())))))))); + + Types.NestedField requiredLeafField = schema.findField(4); + Accessor accessor = schema.accessorForField(4); + + BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "requiredLeaf"); + assertThat(ref.producesNull()).isTrue(); + } +} From e77a8161fd7de19d3b34a77e687fee5ff1bc01ac Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Thu, 14 Aug 2025 23:42:21 -0400 Subject: [PATCH 04/12] add spark tests and address review feedback --- .palantir/revapi.yml | 2 +- .../java/org/apache/iceberg/Accessors.java | 4 +- .../iceberg/expressions/BoundReference.java | 2 +- ...ducesNull.java => TestBoundReference.java} | 90 +++++++++++-------- .../parquet/TestBloomRowGroupFilter.java | 4 +- .../iceberg/spark/sql/TestFilterPushDown.java | 21 +++++ .../iceberg/spark/sql/TestFilterPushDown.java | 21 +++++ .../iceberg/spark/sql/TestFilterPushDown.java | 21 +++++ 8 files changed, 125 insertions(+), 40 deletions(-) rename api/src/test/java/org/apache/iceberg/expressions/{TestBoundReferenceProducesNull.java => TestBoundReference.java} (56%) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 4a91905499b2..1caf77d73af5 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1257,7 +1257,7 @@ acceptedBreaks: org.apache.iceberg:iceberg-api: - code: "java.method.addedToInterface" new: "method boolean org.apache.iceberg.Accessor::hasOptionalFieldInPath()" - justification: "All subclasses implement name" + justification: "All subclasses implement the method" org.apache.iceberg:iceberg-common: - code: "java.method.visibilityReduced" old: "method R org.apache.iceberg.common.DynConstructors.Ctor::invokeChecked(java.lang.Object,\ diff --git a/api/src/main/java/org/apache/iceberg/Accessors.java b/api/src/main/java/org/apache/iceberg/Accessors.java index 63578e39d90d..06ee0a916cf7 100644 --- a/api/src/main/java/org/apache/iceberg/Accessors.java +++ b/api/src/main/java/org/apache/iceberg/Accessors.java @@ -61,7 +61,7 @@ private static class PositionAccessor implements Accessor { private final Class javaClass; private final boolean hasOptionalFieldInPath; - PositionAccessor(int pos, boolean isOptional, Type type) { + PositionAccessor(int pos, Type type, boolean isOptional) { this.position = pos; this.type = type; this.javaClass = type.typeId().javaClass(); @@ -212,7 +212,7 @@ public String toString() { } private static Accessor newAccessor(int pos, boolean isOptional, Type type) { - return new PositionAccessor(pos, isOptional, type); + return new PositionAccessor(pos, type, isOptional); } private static Accessor newAccessor( diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java index 0ffd5b465231..decda85f2e63 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java @@ -59,7 +59,7 @@ public Type type() { public boolean producesNull() { // A leaf required field can evaluate to null if it is optional itself or any // ancestor on the path is optional. - return field.isOptional() || accessor.hasOptionalFieldInPath(); + return accessor.hasOptionalFieldInPath(); } @Override diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReferenceProducesNull.java b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java similarity index 56% rename from api/src/test/java/org/apache/iceberg/expressions/TestBoundReferenceProducesNull.java rename to api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java index 722f073b976c..72c0c53e1dca 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReferenceProducesNull.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java @@ -28,104 +28,124 @@ import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; -public class TestBoundReferenceProducesNull { +public class TestBoundReference { @Test - public void testNonNestedField() { - // schema: required, optional + public void testProducesNullNonNestedField() { + // schema: x (required), y (optional) Schema schema = new Schema( - required(1, "required", Types.IntegerType.get()), - optional(2, "optional", Types.IntegerType.get())); + required(1, "x", Types.IntegerType.get()), optional(2, "y", Types.IntegerType.get())); Types.NestedField requiredField = schema.findField(1); Accessor requiredAccessor = schema.accessorForField(1); BoundReference requiredRef = - new BoundReference<>(requiredField, requiredAccessor, "required"); + new BoundReference<>(requiredField, requiredAccessor, "x"); assertThat(requiredRef.producesNull()).isFalse(); Types.NestedField optionalField = schema.findField(2); Accessor optionalAccessor = schema.accessorForField(2); BoundReference optionalRef = - new BoundReference<>(optionalField, optionalAccessor, "optional"); + new BoundReference<>(optionalField, optionalAccessor, "y"); assertThat(optionalRef.producesNull()).isTrue(); } @Test - public void testRequiredLeafNoOptionalAncestors() { - // schema: requiredAncestor -> requiredLeaf + public void testProducesNullRequiredLeafNoOptional_ancestors() { + // schema: s1 (required) -> s2 (required) Schema schema = new Schema( - required( - 1, - "requiredAncestor", - Types.StructType.of(required(2, "requiredLeaf", Types.IntegerType.get())))); + required(1, "s1", Types.StructType.of(required(2, "s2", Types.IntegerType.get())))); Types.NestedField requiredLeafField = schema.findField(2); Accessor accessor = schema.accessorForField(2); - BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "requiredLeaf"); + BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "s2"); assertThat(ref.producesNull()).isFalse(); } @Test - public void testOptionalLeaf() { - // schema: requiredAncestor -> optionalLeaf + public void testProducesNullOptionalLeaf() { + // schema: s1 (required) -> s2 (optional) Schema schema = new Schema( - required( - 1, - "requiredAncestor", - Types.StructType.of(optional(2, "optionalLeaf", Types.IntegerType.get())))); + required(1, "s1", Types.StructType.of(optional(2, "s2", Types.IntegerType.get())))); Types.NestedField optionalLeafField = schema.findField(2); Accessor accessor = schema.accessorForField(2); - BoundReference ref = new BoundReference<>(optionalLeafField, accessor, "optionalLeaf"); + BoundReference ref = new BoundReference<>(optionalLeafField, accessor, "s2"); assertThat(ref.producesNull()).isTrue(); } @Test - public void testRequiredLeafWithOptionalTopAncestor() { - // schema: optionalAncestor -> requiredLeaf + public void testProducesNullRequiredLeafWithOptionalTopAncestor() { + // schema: s1 (optional) -> s2 (required) Schema schema = new Schema( - optional( - 1, - "optionalAncestor", - Types.StructType.of(required(2, "requiredLeaf", Types.IntegerType.get())))); + optional(1, "s1", Types.StructType.of(required(2, "s2", Types.IntegerType.get())))); Types.NestedField requiredLeafField = schema.findField(2); Accessor accessor = schema.accessorForField(2); - BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "requiredLeaf"); + BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "s2"); assertThat(ref.producesNull()).isTrue(); } @Test - public void testRequiredLeafWithOptionalIntermediateAncestor() { - // schema: requiredRoot -> optionalIntermediate -> requiredIntermediate -> requiredLeaf + public void testProducesNullRequiredLeafWithOptionalIntermediateAncestor() { + // schema: s1 (required) -> s2 (optional) -> s3 (required) -> s4 (required) Schema schema = new Schema( required( 1, - "requiredRoot", + "s1", Types.StructType.of( optional( 2, - "optionalIntermediate", + "s2", Types.StructType.of( required( 3, - "requiredIntermediate", + "s3", Types.StructType.of( - required(4, "requiredLeaf", Types.IntegerType.get())))))))); + required(4, "s4", Types.IntegerType.get())))))))); Types.NestedField requiredLeafField = schema.findField(4); Accessor accessor = schema.accessorForField(4); - BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "requiredLeaf"); + BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "s4"); assertThat(ref.producesNull()).isTrue(); } + + @Test + public void testProducesNullRequiredNestedField4() { + // schema: s1 (required) -> s2 (required) -> s3 (required) -> s4 (required) -> s5 (required) + Schema schema = + new Schema( + required( + 1, + "s1", + Types.StructType.of( + required( + 2, + "s2", + Types.StructType.of( + required( + 3, + "s3", + Types.StructType.of( + required( + 4, + "s4", + Types.StructType.of( + required(5, "s5", Types.IntegerType.get())))))))))); + + Types.NestedField requiredLeafField = schema.findField(5); + Accessor accessor = schema.accessorForField(5); + + BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "s5"); + assertThat(ref.producesNull()).isFalse(); + } } diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java index 35be4863d10a..694c72876c43 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java @@ -320,7 +320,9 @@ public void testIsNull() { shouldRead = new ParquetBloomRowGroupFilter(SCHEMA, isNull("struct_not_null.int_field")) .shouldRead(parquetSchema, rowGroupMetadata, bloomStore); - assertThat(shouldRead).as("Should read: bloom filter doesn't help").isTrue(); + assertThat(shouldRead) + .as("Should read: required nested field can still be null if any ancestor is optional") + .isTrue(); } @Test diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 9d2ce2b388a2..2b4be20f48b9 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -578,6 +578,27 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { ImmutableList.of(row(4, Double.NEGATIVE_INFINITY))); } + @TestTemplate + public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { + sql( + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + "USING iceberg ", + tableName); + configurePlanningMode(planningMode); + + sql("INSERT INTO %s VALUES (0, NULL)", tableName); + sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); + + checkOnlyIcebergFilters( + "address is null" /* query predicate */, + "address IS NULL" /* Iceberg scan filters */, + ImmutableList.of(row(0, null))); + + checkOnlyIcebergFilters( + "address is not null" /* query predicate */, + "address IS NOT NULL" /* Iceberg scan filters */, + ImmutableList.of(row(1, row("123 Main St")))); + } + private void checkOnlyIcebergFilters( String predicate, String icebergFilters, List expectedRows) { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 9d2ce2b388a2..2b4be20f48b9 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -578,6 +578,27 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { ImmutableList.of(row(4, Double.NEGATIVE_INFINITY))); } + @TestTemplate + public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { + sql( + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + "USING iceberg ", + tableName); + configurePlanningMode(planningMode); + + sql("INSERT INTO %s VALUES (0, NULL)", tableName); + sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); + + checkOnlyIcebergFilters( + "address is null" /* query predicate */, + "address IS NULL" /* Iceberg scan filters */, + ImmutableList.of(row(0, null))); + + checkOnlyIcebergFilters( + "address is not null" /* query predicate */, + "address IS NOT NULL" /* Iceberg scan filters */, + ImmutableList.of(row(1, row("123 Main St")))); + } + private void checkOnlyIcebergFilters( String predicate, String icebergFilters, List expectedRows) { diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 9d2ce2b388a2..2b4be20f48b9 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -578,6 +578,27 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { ImmutableList.of(row(4, Double.NEGATIVE_INFINITY))); } + @TestTemplate + public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { + sql( + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + "USING iceberg ", + tableName); + configurePlanningMode(planningMode); + + sql("INSERT INTO %s VALUES (0, NULL)", tableName); + sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); + + checkOnlyIcebergFilters( + "address is null" /* query predicate */, + "address IS NULL" /* Iceberg scan filters */, + ImmutableList.of(row(0, null))); + + checkOnlyIcebergFilters( + "address is not null" /* query predicate */, + "address IS NOT NULL" /* Iceberg scan filters */, + ImmutableList.of(row(1, row("123 Main St")))); + } + private void checkOnlyIcebergFilters( String predicate, String icebergFilters, List expectedRows) { From b6c0cb6d123980a8f322b7d8e008c3ad873529aa Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Thu, 14 Aug 2025 23:46:36 -0400 Subject: [PATCH 05/12] style --- .../java/org/apache/iceberg/spark/sql/TestFilterPushDown.java | 3 ++- .../java/org/apache/iceberg/spark/sql/TestFilterPushDown.java | 3 ++- .../java/org/apache/iceberg/spark/sql/TestFilterPushDown.java | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 2b4be20f48b9..f4d4bd0b82ba 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -581,7 +581,8 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { @TestTemplate public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + "USING iceberg ", + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + + "USING iceberg ", tableName); configurePlanningMode(planningMode); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 2b4be20f48b9..f4d4bd0b82ba 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -581,7 +581,8 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { @TestTemplate public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + "USING iceberg ", + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + + "USING iceberg ", tableName); configurePlanningMode(planningMode); diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 2b4be20f48b9..f4d4bd0b82ba 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -581,7 +581,8 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { @TestTemplate public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + "USING iceberg ", + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + + "USING iceberg ", tableName); configurePlanningMode(planningMode); From 707382fd4e3d05bac592cf4ebc4376caa775854c Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Fri, 15 Aug 2025 01:19:23 -0400 Subject: [PATCH 06/12] fix tests --- .../expressions/TestBoundReference.java | 182 +++++++----------- .../iceberg/spark/sql/TestFilterPushDown.java | 8 +- .../iceberg/spark/sql/TestFilterPushDown.java | 8 +- .../iceberg/spark/sql/TestFilterPushDown.java | 8 +- 4 files changed, 79 insertions(+), 127 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java index 72c0c53e1dca..664180e7937e 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java @@ -22,130 +22,82 @@ import static org.apache.iceberg.types.Types.NestedField.required; import static org.assertj.core.api.Assertions.assertThat; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; import org.apache.iceberg.Accessor; import org.apache.iceberg.Schema; import org.apache.iceberg.StructLike; import org.apache.iceberg.types.Types; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; public class TestBoundReference { - @Test - public void testProducesNullNonNestedField() { - // schema: x (required), y (optional) - Schema schema = - new Schema( - required(1, "x", Types.IntegerType.get()), optional(2, "y", Types.IntegerType.get())); - - Types.NestedField requiredField = schema.findField(1); - Accessor requiredAccessor = schema.accessorForField(1); - - BoundReference requiredRef = - new BoundReference<>(requiredField, requiredAccessor, "x"); - assertThat(requiredRef.producesNull()).isFalse(); - - Types.NestedField optionalField = schema.findField(2); - Accessor optionalAccessor = schema.accessorForField(2); - - BoundReference optionalRef = - new BoundReference<>(optionalField, optionalAccessor, "y"); - assertThat(optionalRef.producesNull()).isTrue(); - } - - @Test - public void testProducesNullRequiredLeafNoOptional_ancestors() { - // schema: s1 (required) -> s2 (required) - Schema schema = - new Schema( - required(1, "s1", Types.StructType.of(required(2, "s2", Types.IntegerType.get())))); - - Types.NestedField requiredLeafField = schema.findField(2); - Accessor accessor = schema.accessorForField(2); - - BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "s2"); - assertThat(ref.producesNull()).isFalse(); - } - - @Test - public void testProducesNullOptionalLeaf() { - // schema: s1 (required) -> s2 (optional) - Schema schema = - new Schema( - required(1, "s1", Types.StructType.of(optional(2, "s2", Types.IntegerType.get())))); - - Types.NestedField optionalLeafField = schema.findField(2); - Accessor accessor = schema.accessorForField(2); - - BoundReference ref = new BoundReference<>(optionalLeafField, accessor, "s2"); - assertThat(ref.producesNull()).isTrue(); - } - - @Test - public void testProducesNullRequiredLeafWithOptionalTopAncestor() { - // schema: s1 (optional) -> s2 (required) - Schema schema = - new Schema( - optional(1, "s1", Types.StructType.of(required(2, "s2", Types.IntegerType.get())))); - - Types.NestedField requiredLeafField = schema.findField(2); - Accessor accessor = schema.accessorForField(2); - - BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "s2"); - assertThat(ref.producesNull()).isTrue(); + // Build a schema with a single nested struct with optionalList.size() levels with the following + // structure: + // s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int)))) + // where each s{i} is an optional struct if optionalList.get(i) is true and a required struct if + // false + private static Schema buildSchemaFromoptionalList(List optionalList, String leafName) { + if (optionalList == null || optionalList.isEmpty()) { + throw new IllegalArgumentException("optionalList must not be empty"); + } + + Types.NestedField leaf = + optionalList.get(optionalList.size() - 1) + ? optional(optionalList.size(), leafName, Types.IntegerType.get()) + : required(optionalList.size(), leafName, Types.IntegerType.get()); + + Types.StructType current = Types.StructType.of(leaf); + + for (int i = optionalList.size() - 2; i >= 0; i--) { + int id = i + 1; + String name = "s" + (i + 1); + current = + Types.StructType.of( + optionalList.get(i) ? optional(id, name, current) : required(id, name, current)); + } + + return new Schema(current.fields()); } - @Test - public void testProducesNullRequiredLeafWithOptionalIntermediateAncestor() { - // schema: s1 (required) -> s2 (optional) -> s3 (required) -> s4 (required) - Schema schema = - new Schema( - required( - 1, - "s1", - Types.StructType.of( - optional( - 2, - "s2", - Types.StructType.of( - required( - 3, - "s3", - Types.StructType.of( - required(4, "s4", Types.IntegerType.get())))))))); - - Types.NestedField requiredLeafField = schema.findField(4); - Accessor accessor = schema.accessorForField(4); - - BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "s4"); - assertThat(ref.producesNull()).isTrue(); + private static Stream producesNullCases() { + return Stream.of( + // basic fields, no struct levels + Arguments.of(Arrays.asList(false), false), + Arguments.of(Arrays.asList(true), true), + // one level + Arguments.of(Arrays.asList(false, false), false), + Arguments.of(Arrays.asList(false, true), true), + Arguments.of(Arrays.asList(true, false), true), + // two levels + Arguments.of(Arrays.asList(false, false, false), false), + Arguments.of(Arrays.asList(false, false, true), true), + Arguments.of(Arrays.asList(true, false, false), true), + Arguments.of(Arrays.asList(false, true, false), true), + // three levels + Arguments.of(Arrays.asList(false, false, false, false), false), + Arguments.of(Arrays.asList(false, false, false, true), true), + Arguments.of(Arrays.asList(true, false, false, false), true), + Arguments.of(Arrays.asList(false, true, false, false), true), + // four levels + Arguments.of(Arrays.asList(false, false, false, false, false), false), + Arguments.of(Arrays.asList(false, false, false, false, true), true), + Arguments.of(Arrays.asList(true, false, false, false, false), true), + Arguments.of(Arrays.asList(false, true, true, true, false), true)); } - @Test - public void testProducesNullRequiredNestedField4() { - // schema: s1 (required) -> s2 (required) -> s3 (required) -> s4 (required) -> s5 (required) - Schema schema = - new Schema( - required( - 1, - "s1", - Types.StructType.of( - required( - 2, - "s2", - Types.StructType.of( - required( - 3, - "s3", - Types.StructType.of( - required( - 4, - "s4", - Types.StructType.of( - required(5, "s5", Types.IntegerType.get())))))))))); - - Types.NestedField requiredLeafField = schema.findField(5); - Accessor accessor = schema.accessorForField(5); - - BoundReference ref = new BoundReference<>(requiredLeafField, accessor, "s5"); - assertThat(ref.producesNull()).isFalse(); + @ParameterizedTest + @MethodSource("producesNullCases") + public void testProducesNull(List optionalList, boolean expectedProducesNull) { + String leafName = "leaf"; + Schema schema = buildSchemaFromoptionalList(optionalList, leafName); + int leafId = optionalList.size(); + Types.NestedField leafField = schema.findField(leafId); + Accessor accessor = schema.accessorForField(leafId); + + BoundReference ref = new BoundReference<>(leafField, accessor, leafName); + assertThat(ref.producesNull()).isEqualTo(expectedProducesNull); } } diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index f4d4bd0b82ba..88b4ac648db7 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -590,13 +590,13 @@ public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); checkOnlyIcebergFilters( - "address is null" /* query predicate */, - "address IS NULL" /* Iceberg scan filters */, + "address.street is null" /* query predicate */, + "address.street IS NULL" /* Iceberg scan filters */, ImmutableList.of(row(0, null))); checkOnlyIcebergFilters( - "address is not null" /* query predicate */, - "address IS NOT NULL" /* Iceberg scan filters */, + "address.street is not null" /* query predicate */, + "address.street IS NOT NULL" /* Iceberg scan filters */, ImmutableList.of(row(1, row("123 Main St")))); } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index f4d4bd0b82ba..88b4ac648db7 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -590,13 +590,13 @@ public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); checkOnlyIcebergFilters( - "address is null" /* query predicate */, - "address IS NULL" /* Iceberg scan filters */, + "address.street is null" /* query predicate */, + "address.street IS NULL" /* Iceberg scan filters */, ImmutableList.of(row(0, null))); checkOnlyIcebergFilters( - "address is not null" /* query predicate */, - "address IS NOT NULL" /* Iceberg scan filters */, + "address.street is not null" /* query predicate */, + "address.street IS NOT NULL" /* Iceberg scan filters */, ImmutableList.of(row(1, row("123 Main St")))); } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index f4d4bd0b82ba..88b4ac648db7 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -590,13 +590,13 @@ public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); checkOnlyIcebergFilters( - "address is null" /* query predicate */, - "address IS NULL" /* Iceberg scan filters */, + "address.street is null" /* query predicate */, + "address.street IS NULL" /* Iceberg scan filters */, ImmutableList.of(row(0, null))); checkOnlyIcebergFilters( - "address is not null" /* query predicate */, - "address IS NOT NULL" /* Iceberg scan filters */, + "address.street is not null" /* query predicate */, + "address.street IS NOT NULL" /* Iceberg scan filters */, ImmutableList.of(row(1, row("123 Main St")))); } From 7223d0afb034e2239f78227bebabee3442a93408 Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Fri, 15 Aug 2025 01:21:23 -0400 Subject: [PATCH 07/12] fix tests --- .../java/org/apache/iceberg/expressions/TestBoundReference.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java index 664180e7937e..96282e3b6a0f 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java @@ -39,7 +39,7 @@ public class TestBoundReference { // s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int)))) // where each s{i} is an optional struct if optionalList.get(i) is true and a required struct if // false - private static Schema buildSchemaFromoptionalList(List optionalList, String leafName) { + private static Schema buildSchemaFromOptionalList(List optionalList, String leafName) { if (optionalList == null || optionalList.isEmpty()) { throw new IllegalArgumentException("optionalList must not be empty"); } From 03fda5f5261170483d48533a56223c703e631b53 Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Fri, 15 Aug 2025 16:04:31 -0400 Subject: [PATCH 08/12] fix typo --- .../java/org/apache/iceberg/expressions/TestBoundReference.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java index 96282e3b6a0f..f19a455ec3ee 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java @@ -92,7 +92,7 @@ private static Stream producesNullCases() { @MethodSource("producesNullCases") public void testProducesNull(List optionalList, boolean expectedProducesNull) { String leafName = "leaf"; - Schema schema = buildSchemaFromoptionalList(optionalList, leafName); + Schema schema = buildSchemaFromOptionalList(optionalList, leafName); int leafId = optionalList.size(); Types.NestedField leafField = schema.findField(leafId); Accessor accessor = schema.accessorForField(leafId); From f6563c75aaf05bf7c96ed74ab1121b51083172ee Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Sun, 17 Aug 2025 15:16:25 -0400 Subject: [PATCH 09/12] move the spark test to TestSelect class --- .../iceberg/spark/sql/TestFilterPushDown.java | 22 ------------------- .../apache/iceberg/spark/sql/TestSelect.java | 17 ++++++++++++++ .../iceberg/spark/sql/TestFilterPushDown.java | 22 ------------------- .../apache/iceberg/spark/sql/TestSelect.java | 17 ++++++++++++++ .../iceberg/spark/sql/TestFilterPushDown.java | 22 ------------------- .../apache/iceberg/spark/sql/TestSelect.java | 17 ++++++++++++++ 6 files changed, 51 insertions(+), 66 deletions(-) diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 88b4ac648db7..9d2ce2b388a2 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -578,28 +578,6 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { ImmutableList.of(row(4, Double.NEGATIVE_INFINITY))); } - @TestTemplate - public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { - sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" - + "USING iceberg ", - tableName); - configurePlanningMode(planningMode); - - sql("INSERT INTO %s VALUES (0, NULL)", tableName); - sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); - - checkOnlyIcebergFilters( - "address.street is null" /* query predicate */, - "address.street IS NULL" /* Iceberg scan filters */, - ImmutableList.of(row(0, null))); - - checkOnlyIcebergFilters( - "address.street is not null" /* query predicate */, - "address.street IS NOT NULL" /* Iceberg scan filters */, - ImmutableList.of(row(1, row("123 Main St")))); - } - private void checkOnlyIcebergFilters( String predicate, String icebergFilters, List expectedRows) { diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java index ab151398204e..04925ff318b9 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java @@ -613,4 +613,21 @@ public void testComplexTypeFilter() { assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result); sql("DROP TABLE IF EXISTS %s", complexTypeTableName); } + + @TestTemplate + public void testRequiredNestedFieldInOptionalStructFilter() { + String nestedStructTable = tableName("nested_struct_table"); + sql( + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + + "USING iceberg ", + nestedStructTable); + sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable); + sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable); + + List result = + sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable); + + assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result); + sql("DROP TABLE IF EXISTS %s", nestedStructTable); + } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 88b4ac648db7..9d2ce2b388a2 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -578,28 +578,6 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { ImmutableList.of(row(4, Double.NEGATIVE_INFINITY))); } - @TestTemplate - public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { - sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" - + "USING iceberg ", - tableName); - configurePlanningMode(planningMode); - - sql("INSERT INTO %s VALUES (0, NULL)", tableName); - sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); - - checkOnlyIcebergFilters( - "address.street is null" /* query predicate */, - "address.street IS NULL" /* Iceberg scan filters */, - ImmutableList.of(row(0, null))); - - checkOnlyIcebergFilters( - "address.street is not null" /* query predicate */, - "address.street IS NOT NULL" /* Iceberg scan filters */, - ImmutableList.of(row(1, row("123 Main St")))); - } - private void checkOnlyIcebergFilters( String predicate, String icebergFilters, List expectedRows) { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java index 240622750fdc..81d529d75ad0 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java @@ -613,4 +613,21 @@ public void testComplexTypeFilter() { assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result); sql("DROP TABLE IF EXISTS %s", complexTypeTableName); } + + @TestTemplate + public void testRequiredNestedFieldInOptionalStructFilter() { + String nestedStructTable = tableName("nested_struct_table"); + sql( + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + + "USING iceberg ", + nestedStructTable); + sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable); + sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable); + + List result = + sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable); + + assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result); + sql("DROP TABLE IF EXISTS %s", nestedStructTable); + } } diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java index 88b4ac648db7..9d2ce2b388a2 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java @@ -578,28 +578,6 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() { ImmutableList.of(row(4, Double.NEGATIVE_INFINITY))); } - @TestTemplate - public void testFilterPushdownWithRequiredNestedFieldInOptionalStruct() { - sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" - + "USING iceberg ", - tableName); - configurePlanningMode(planningMode); - - sql("INSERT INTO %s VALUES (0, NULL)", tableName); - sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", tableName); - - checkOnlyIcebergFilters( - "address.street is null" /* query predicate */, - "address.street IS NULL" /* Iceberg scan filters */, - ImmutableList.of(row(0, null))); - - checkOnlyIcebergFilters( - "address.street is not null" /* query predicate */, - "address.street IS NOT NULL" /* Iceberg scan filters */, - ImmutableList.of(row(1, row("123 Main St")))); - } - private void checkOnlyIcebergFilters( String predicate, String icebergFilters, List expectedRows) { diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java index 240622750fdc..81d529d75ad0 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java @@ -613,4 +613,21 @@ public void testComplexTypeFilter() { assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result); sql("DROP TABLE IF EXISTS %s", complexTypeTableName); } + + @TestTemplate + public void testRequiredNestedFieldInOptionalStructFilter() { + String nestedStructTable = tableName("nested_struct_table"); + sql( + "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" + + "USING iceberg ", + nestedStructTable); + sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable); + sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable); + + List result = + sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable); + + assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result); + sql("DROP TABLE IF EXISTS %s", nestedStructTable); + } } From fbd792130070660a657e87731cfdeb916b03c1c1 Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Mon, 18 Aug 2025 12:37:06 -0400 Subject: [PATCH 10/12] sql formatting --- .../test/java/org/apache/iceberg/spark/sql/TestSelect.java | 4 ++-- .../test/java/org/apache/iceberg/spark/sql/TestSelect.java | 4 ++-- .../test/java/org/apache/iceberg/spark/sql/TestSelect.java | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java index 04925ff318b9..a21dfd388d1b 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java @@ -618,8 +618,8 @@ public void testComplexTypeFilter() { public void testRequiredNestedFieldInOptionalStructFilter() { String nestedStructTable = tableName("nested_struct_table"); sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" - + "USING iceberg ", + "CREATE TABLE %s (id INT NOT NULL, address STRUCT) " + + "USING iceberg", nestedStructTable); sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable); sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java index 81d529d75ad0..68b93be47960 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java @@ -618,8 +618,8 @@ public void testComplexTypeFilter() { public void testRequiredNestedFieldInOptionalStructFilter() { String nestedStructTable = tableName("nested_struct_table"); sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" - + "USING iceberg ", + "CREATE TABLE %s (id INT NOT NULL, address STRUCT) " + + "USING iceberg", nestedStructTable); sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable); sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable); diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java index 81d529d75ad0..68b93be47960 100644 --- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java +++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java @@ -618,8 +618,8 @@ public void testComplexTypeFilter() { public void testRequiredNestedFieldInOptionalStructFilter() { String nestedStructTable = tableName("nested_struct_table"); sql( - "CREATE TABLE %s (id INT NOT NULL, address STRUCT)" - + "USING iceberg ", + "CREATE TABLE %s (id INT NOT NULL, address STRUCT) " + + "USING iceberg", nestedStructTable); sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable); sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable); From 740e5f13da45925e2694b725ece6e454b4023adb Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Fri, 22 Aug 2025 13:08:07 -0400 Subject: [PATCH 11/12] Update api/src/main/java/org/apache/iceberg/Accessor.java Co-authored-by: Eduard Tudenhoefner --- api/src/main/java/org/apache/iceberg/Accessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/Accessor.java b/api/src/main/java/org/apache/iceberg/Accessor.java index 9e030c708132..b8ee86dcec9a 100644 --- a/api/src/main/java/org/apache/iceberg/Accessor.java +++ b/api/src/main/java/org/apache/iceberg/Accessor.java @@ -27,5 +27,5 @@ public interface Accessor extends Serializable { Type type(); /** Returns true if the current field or any ancestor in the access path is optional. */ - boolean hasOptionalFieldInPath(); + default boolean hasOptionalFieldInPath() { return false; } } From 535c8a1707eb4f70b708073e2e18ff27f6019cbf Mon Sep 17 00:00:00 2001 From: Dejan Gvozdenac Date: Fri, 22 Aug 2025 10:26:08 -0700 Subject: [PATCH 12/12] remove api breakage and add improve tests --- .palantir/revapi.yml | 4 ---- api/src/main/java/org/apache/iceberg/Accessor.java | 4 +++- .../iceberg/expressions/TestBoundReference.java | 13 +++++++++---- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 1caf77d73af5..1c871ba30c04 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1254,10 +1254,6 @@ acceptedBreaks: new: "method void org.apache.iceberg.data.parquet.BaseParquetWriter::()" justification: "Changing deprecated code" "1.9.0": - org.apache.iceberg:iceberg-api: - - code: "java.method.addedToInterface" - new: "method boolean org.apache.iceberg.Accessor::hasOptionalFieldInPath()" - justification: "All subclasses implement the method" org.apache.iceberg:iceberg-common: - code: "java.method.visibilityReduced" old: "method R org.apache.iceberg.common.DynConstructors.Ctor::invokeChecked(java.lang.Object,\ diff --git a/api/src/main/java/org/apache/iceberg/Accessor.java b/api/src/main/java/org/apache/iceberg/Accessor.java index b8ee86dcec9a..20b09bf910c9 100644 --- a/api/src/main/java/org/apache/iceberg/Accessor.java +++ b/api/src/main/java/org/apache/iceberg/Accessor.java @@ -27,5 +27,7 @@ public interface Accessor extends Serializable { Type type(); /** Returns true if the current field or any ancestor in the access path is optional. */ - default boolean hasOptionalFieldInPath() { return false; } + default boolean hasOptionalFieldInPath() { + return false; + } } diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java index f19a455ec3ee..ed921b248f79 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java @@ -28,6 +28,7 @@ import org.apache.iceberg.Accessor; import org.apache.iceberg.Schema; import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -40,10 +41,8 @@ public class TestBoundReference { // where each s{i} is an optional struct if optionalList.get(i) is true and a required struct if // false private static Schema buildSchemaFromOptionalList(List optionalList, String leafName) { - if (optionalList == null || optionalList.isEmpty()) { - throw new IllegalArgumentException("optionalList must not be empty"); - } - + Preconditions.checkArgument( + optionalList != null && !optionalList.isEmpty(), "optionalList must not be null or empty"); Types.NestedField leaf = optionalList.get(optionalList.size() - 1) ? optional(optionalList.size(), leafName, Types.IntegerType.get()) @@ -63,6 +62,12 @@ private static Schema buildSchemaFromOptionalList(List optionalList, St } private static Stream producesNullCases() { + // the test cases specify two arguments: + // - the first is a list of booleans that indicate whether fields in the nested sequence of + // structs are optional or required. For example, [false, true, false] will construct a + // struct like s1.s2.s3 with s1 being required, s2 being optional, and s3 being required. + // - the second is a boolean that indicates whether calling producesNull() on the BoundReference + // of the leaf field should return true or false. return Stream.of( // basic fields, no struct levels Arguments.of(Arrays.asList(false), false),