diff --git a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java index 435f79129204..e7679bfe6f69 100644 --- a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java +++ b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java @@ -134,6 +134,7 @@ public static TableMetadata replacePaths( snapshotId, newSnapshots, null, + null, metadata.snapshotLog(), metadataLogEntries, metadata.refs(), diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 43a67dd2bef2..824c1b40b414 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -259,18 +259,19 @@ public String toString() { private final Map schemasById; private final Map specsById; private final Map sortOrdersById; - private final List snapshotLog; + private volatile List snapshotLog; private final List previousFiles; private final List statisticsFiles; private final List partitionStatisticsFiles; private final List changes; private final long nextRowId; private final List encryptionKeys; - private SerializableSupplier> snapshotsSupplier; + @Deprecated private SerializableSupplier> snapshotsSupplier; + private SerializableSupplier deferredMetadataSupplier; private volatile List snapshots; private volatile Map snapshotsById; private volatile Map refs; - private volatile boolean snapshotsLoaded; + private volatile boolean deferredLoaded; @SuppressWarnings("checkstyle:CyclomaticComplexity") TableMetadata( @@ -292,6 +293,7 @@ public String toString() { long currentSnapshotId, List snapshots, SerializableSupplier> snapshotsSupplier, + SerializableSupplier deferredMetadataSupplier, List snapshotLog, List previousFiles, Map refs, @@ -319,6 +321,9 @@ public String toString() { metadataFileLocation == null || changes.isEmpty(), "Cannot create TableMetadata with a metadata location and changes"); Preconditions.checkArgument(encryptionKeys != null, "Encryption keys cannot be null"); + Preconditions.checkArgument( + snapshotsSupplier == null || deferredMetadataSupplier == null, + "Cannot set both snapshotsSupplier and deferredMetadataSupplier"); this.metadataFileLocation = metadataFileLocation; this.formatVersion = formatVersion; @@ -338,7 +343,8 @@ public String toString() { this.currentSnapshotId = currentSnapshotId; this.snapshots = snapshots; this.snapshotsSupplier = snapshotsSupplier; - this.snapshotsLoaded = snapshotsSupplier == null; + this.deferredMetadataSupplier = deferredMetadataSupplier; + this.deferredLoaded = snapshotsSupplier == null && deferredMetadataSupplier == null; this.snapshotLog = snapshotLog; this.previousFiles = previousFiles; this.encryptionKeys = encryptionKeys; @@ -515,7 +521,7 @@ public long propertyAsLong(String property, long defaultValue) { public Snapshot snapshot(long snapshotId) { if (!snapshotsById.containsKey(snapshotId)) { - ensureSnapshotsLoaded(); + ensureDeferredLoaded(); } return snapshotsById.get(snapshotId); @@ -526,14 +532,24 @@ public Snapshot currentSnapshot() { } public List snapshots() { - ensureSnapshotsLoaded(); + ensureDeferredLoaded(); return snapshots; } - private synchronized void ensureSnapshotsLoaded() { - if (!snapshotsLoaded) { - List loadedSnapshots = Lists.newArrayList(snapshotsSupplier.get()); + private synchronized void ensureDeferredLoaded() { + if (!deferredLoaded) { + List loadedSnapshots; + if (deferredMetadataSupplier != null) { + TableMetadata fullMetadata = deferredMetadataSupplier.get(); + loadedSnapshots = Lists.newArrayList(fullMetadata.snapshots()); + this.snapshotLog = ImmutableList.copyOf(fullMetadata.snapshotLog()); + this.deferredMetadataSupplier = null; + } else { + loadedSnapshots = Lists.newArrayList(snapshotsSupplier.get()); + this.snapshotsSupplier = null; + } + loadedSnapshots.removeIf(s -> s.sequenceNumber() > lastSequenceNumber); this.snapshots = ImmutableList.copyOf(loadedSnapshots); @@ -542,8 +558,7 @@ private synchronized void ensureSnapshotsLoaded() { this.refs = validateRefs(currentSnapshotId, refs, snapshotsById); - this.snapshotsLoaded = true; - this.snapshotsSupplier = null; + this.deferredLoaded = true; } } @@ -564,6 +579,7 @@ public List partitionStatisticsFiles() { } public List snapshotLog() { + ensureDeferredLoaded(); return snapshotLog; } @@ -913,6 +929,7 @@ public static class Builder { private long currentSnapshotId; private List snapshots; private SerializableSupplier> snapshotsSupplier; + private SerializableSupplier deferredMetadataSupplier; private final Map refs; private final Map> statisticsFiles; private final Map> partitionStatisticsFiles; @@ -993,7 +1010,7 @@ private Builder(TableMetadata base) { this.changes = Lists.newArrayList(base.changes); this.startingChangeCount = changes.size(); - this.snapshotLog = Lists.newArrayList(base.snapshotLog); + this.snapshotLog = Lists.newArrayList(base.snapshotLog()); this.previousFileLocation = base.metadataFileLocation; this.previousFiles = base.previousFiles; this.refs = Maps.newHashMap(base.refs); @@ -1278,11 +1295,31 @@ public Builder addSnapshot(Snapshot snapshot) { return this; } + /** + * Lazily loads table snapshots + * + * @param snapshotsSupplier supplier that lazily loads snapshots + * @return this for method chaining + * @deprecated use {@link #setDeferredMetadataSupplier(SerializableSupplier)} instead. + */ + @Deprecated public Builder setSnapshotsSupplier(SerializableSupplier> snapshotsSupplier) { this.snapshotsSupplier = snapshotsSupplier; return this; } + /** + * Lazily loads complete table metadata + * + * @param deferredMetadataSupplier supplier that lazily loads complete TableMetadata + * @return this for method chaining + */ + public Builder setDeferredMetadataSupplier( + SerializableSupplier deferredMetadataSupplier) { + this.deferredMetadataSupplier = deferredMetadataSupplier; + return this; + } + public Builder setBranchSnapshot(Snapshot snapshot, String branch) { addSnapshot(snapshot); setBranchSnapshotInternal(snapshot, branch); @@ -1385,6 +1422,7 @@ public Builder suppressHistoricalSnapshots() { refs.values().stream().map(SnapshotRef::snapshotId).collect(Collectors.toSet()); Set suppressedSnapshotIds = Sets.difference(snapshotsById.keySet(), refSnapshotIds); rewriteSnapshotsInternal(suppressedSnapshotIds, true); + this.snapshotLog.removeIf(entry -> !refSnapshotIds.contains(entry.snapshotId())); return this; } @@ -1528,7 +1566,8 @@ private boolean hasChanges() { || (discardChanges && !changes.isEmpty()) || metadataLocation != null || suppressHistoricalSnapshots - || null != snapshotsSupplier; + || null != snapshotsSupplier + || null != deferredMetadataSupplier; } public TableMetadata build() { @@ -1583,6 +1622,7 @@ public TableMetadata build() { currentSnapshotId, ImmutableList.copyOf(snapshots), snapshotsSupplier, + deferredMetadataSupplier, ImmutableList.copyOf(newSnapshotLog), ImmutableList.copyOf(metadataHistory), ImmutableMap.copyOf(refs), diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index eeeeeab8a699..0396de9d0576 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -573,6 +573,7 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { currentSnapshotId, snapshots, null, + null, entries.build(), metadataEntries.build(), refs, diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java index c7b5b5d41c74..1036f055e989 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java @@ -529,11 +529,10 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) { TableMetadata.buildFrom(response.tableMetadata()) .withMetadataLocation(response.metadataLocation()) .setPreviousFileLocation(null) - .setSnapshotsSupplier( + .setDeferredMetadataSupplier( () -> loadInternal(context, finalIdentifier, SnapshotMode.ALL, Map.of(), h -> {}) - .tableMetadata() - .snapshots()) + .tableMetadata()) .discardChanges() .build(); } else { diff --git a/core/src/test/java/org/apache/iceberg/TestSnapshotLoading.java b/core/src/test/java/org/apache/iceberg/TestSnapshotLoading.java index 97fb7d62d7ec..fc2f2bc3f864 100644 --- a/core/src/test/java/org/apache/iceberg/TestSnapshotLoading.java +++ b/core/src/test/java/org/apache/iceberg/TestSnapshotLoading.java @@ -192,6 +192,140 @@ public void testBuildingNewMetadataTriggersSnapshotLoad() { verify(snapshotsSupplierMock, times(1)).get(); } + @TestTemplate + public void testDeferredMetadataSnapshotsAreLoadedOnce() { + SerializableSupplier mock = mockDeferredMetadataSupplier(); + TableMetadata deferred = deferredTableMetadata(mock); + + deferred.snapshots(); + deferred.snapshots(); + deferred.snapshots(); + + verify(mock, times(1)).get(); + + assertThat(deferred.snapshots()).containsExactlyElementsOf(originalTableMetadata.snapshots()); + } + + @TestTemplate + public void testDeferredMetadataCurrentSnapshotDoesNotLoad() { + SerializableSupplier mock = mockDeferredMetadataSupplier(); + TableMetadata deferred = deferredTableMetadata(mock); + + deferred.currentSnapshot(); + deferred.snapshot(deferred.ref(SnapshotRef.MAIN_BRANCH).snapshotId()); + + verify(mock, times(0)).get(); + } + + @TestTemplate + public void testDeferredMetadataSnapshotLogLoadedOnce() { + SerializableSupplier mock = mockDeferredMetadataSupplier(); + TableMetadata deferred = deferredTableMetadata(mock); + + deferred.snapshotLog(); + deferred.snapshotLog(); + + verify(mock, times(1)).get(); + + assertThat(deferred.snapshotLog()) + .containsExactlyElementsOf(originalTableMetadata.snapshotLog()); + } + + @TestTemplate + public void testDeferredMetadataSnapshotLogNotLoadedUntilAccessed() { + SerializableSupplier mock = mockDeferredMetadataSupplier(); + TableMetadata deferred = deferredTableMetadata(mock); + + deferred.currentSnapshot(); + + verify(mock, times(0)).get(); + + deferred.snapshotLog(); + + verify(mock, times(1)).get(); + + assertThat(deferred.snapshotLog()) + .containsExactlyElementsOf(originalTableMetadata.snapshotLog()); + } + + @TestTemplate + public void testDeferredMetadataSnapshotsLoadAlsoPopulatesSnapshotLog() { + SerializableSupplier mock = mockDeferredMetadataSupplier(); + TableMetadata deferred = deferredTableMetadata(mock); + + deferred.snapshots(); + + verify(mock, times(1)).get(); + + deferred.snapshotLog(); + + verify(mock, times(1)).get(); + + assertThat(deferred.snapshotLog()) + .containsExactlyElementsOf(originalTableMetadata.snapshotLog()); + } + + @TestTemplate + public void testDeferredMetadataBuildFromPreservesSnapshotLog() { + SerializableSupplier mock = mockDeferredMetadataSupplier(); + TableMetadata deferred = deferredTableMetadata(mock); + + TableMetadata rebuilt = TableMetadata.buildFrom(deferred).discardChanges().build(); + + verify(mock, times(1)).get(); + + assertThat(rebuilt.snapshotLog()) + .containsExactlyElementsOf(originalTableMetadata.snapshotLog()); + } + + @TestTemplate + public void testSuppressHistoricalSnapshotsFiltersSnapshotLog() { + TableMetadata suppressed = + TableMetadata.buildFrom(originalTableMetadata) + .suppressHistoricalSnapshots() + .discardChanges() + .build(); + + assertThat(suppressed.snapshots()).hasSize(1); + assertThat(suppressed.snapshotLog()).hasSize(1); + assertThat(suppressed.snapshotLog().get(0).snapshotId()) + .isEqualTo(currentSnapshot.snapshotId()); + } + + @TestTemplate + public void testCannotSetBothSuppliers() { + assertThatThrownBy( + () -> + TableMetadata.buildFrom(originalTableMetadata) + .setSnapshotsSupplier(ImmutableList::of) + .setDeferredMetadataSupplier(() -> originalTableMetadata) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot set both snapshotsSupplier and deferredMetadataSupplier"); + } + + private SerializableSupplier mockDeferredMetadataSupplier() { + return Mockito.spy( + new SerializableSupplier() { + @Override + public TableMetadata get() { + return originalTableMetadata; + } + }); + } + + private TableMetadata deferredTableMetadata( + SerializableSupplier deferredMetadataSupplier) { + return TableMetadata.buildFrom(originalTableMetadata) + .removeSnapshots( + allSnapshots.stream() + .filter(Predicate.isEqual(currentSnapshot).negate()) + .collect(Collectors.toList())) + .setDeferredMetadataSupplier(deferredMetadataSupplier) + .discardChanges() + .build(); + } + private static class MetadataTableOperations implements TableOperations { private final FileIO io; private final TableMetadata currentMetadata; diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index cb1decd2d8dc..a08934a21899 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -199,6 +199,7 @@ public void testJsonConversion() throws Exception { currentSnapshotId, Arrays.asList(previousSnapshot, currentSnapshot), null, + null, snapshotLog, ImmutableList.of(), refs, @@ -304,6 +305,7 @@ public void testBackwardCompat() throws Exception { currentSnapshotId, Arrays.asList(previousSnapshot, currentSnapshot), null, + null, ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), @@ -429,6 +431,7 @@ public void testInvalidMainBranch() throws IOException { currentSnapshotId, Arrays.asList(previousSnapshot, currentSnapshot), null, + null, snapshotLog, ImmutableList.of(), refs, @@ -477,6 +480,7 @@ public void testMainWithoutCurrent() throws IOException { -1, ImmutableList.of(snapshot), null, + null, ImmutableList.of(), ImmutableList.of(), refs, @@ -519,6 +523,7 @@ public void testBranchSnapshotMissing() { -1, ImmutableList.of(), null, + null, ImmutableList.of(), ImmutableList.of(), refs, @@ -638,6 +643,7 @@ public void testJsonWithPreviousMetadataLog() throws Exception { currentSnapshotId, Arrays.asList(previousSnapshot, currentSnapshot), null, + null, reversedSnapshotLog, ImmutableList.copyOf(previousMetadataLog), ImmutableMap.of(), @@ -729,6 +735,7 @@ public void testAddPreviousMetadataRemoveNone() throws IOException { currentSnapshotId, Arrays.asList(previousSnapshot, currentSnapshot), null, + null, reversedSnapshotLog, ImmutableList.copyOf(previousMetadataLog), ImmutableMap.of(), @@ -835,6 +842,7 @@ public void testAddPreviousMetadataRemoveOne() throws IOException { currentSnapshotId, Arrays.asList(previousSnapshot, currentSnapshot), null, + null, reversedSnapshotLog, ImmutableList.copyOf(previousMetadataLog), ImmutableMap.of(), @@ -945,6 +953,7 @@ public void testAddPreviousMetadataRemoveMultiple() throws IOException { currentSnapshotId, Arrays.asList(previousSnapshot, currentSnapshot), null, + null, reversedSnapshotLog, ImmutableList.copyOf(previousMetadataLog), ImmutableMap.of(), @@ -993,6 +1002,7 @@ public void testV2UUIDValidation() { -1L, ImmutableList.of(), null, + null, ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), @@ -1030,6 +1040,7 @@ public void testVersionValidation() { -1L, ImmutableList.of(), null, + null, ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), @@ -1078,6 +1089,7 @@ public void testVersionValidation() { -1L, ImmutableList.of(), null, + null, ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(),