diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java index ab20f6fb7453..5454f1cd49a2 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java @@ -117,7 +117,15 @@ protected void processTable(String tableNameWithType, Context context) { TableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableNameWithType); updateTableConfigMetrics(tableNameWithType, tableConfig, context); updateSegmentMetrics(tableNameWithType, tableConfig, context); - updateTableSizeMetrics(tableNameWithType); + // Skip the /table/{name}/size scatter-gather when the table can't return useful size data. Two cases: + // - Disabled table: servers do not load segments, every server responds 404, controller logs flood with + // ERROR entries (issue #14279). + // - Null IdealState: no servers known to query; getServerToSegmentsMap throws IllegalStateException + // which propagates to the outer catch as an ERROR + stack trace per table per cycle. + // updateSegmentMetrics populates context._tablesToSkipSizeAggregation in both branches. + if (!context._tablesToSkipSizeAggregation.contains(tableNameWithType)) { + updateTableSizeMetrics(tableNameWithType); + } } catch (Exception e) { LOGGER.error("Caught exception while updating segment status for table {}", tableNameWithType, e); // Remove the metric for this table @@ -215,6 +223,7 @@ private void updateSegmentMetrics(String tableNameWithType, TableConfig tableCon if (idealState == null) { LOGGER.warn("Table {} has null ideal state. Skipping segment status checks", tableNameWithType); removeMetricsForTable(tableNameWithType); + context._tablesToSkipSizeAggregation.add(tableNameWithType); return; } @@ -224,6 +233,7 @@ private void updateSegmentMetrics(String tableNameWithType, TableConfig tableCon } removeMetricsForTable(tableNameWithType); context._disabledTables.add(tableNameWithType); + context._tablesToSkipSizeAggregation.add(tableNameWithType); return; } @@ -528,6 +538,9 @@ public static final class Context { private final Map _tierBackendTableCountMap = new HashMap<>(); private final Set _processedTables = new HashSet<>(); private final Set _disabledTables = new HashSet<>(); + // Superset of _disabledTables: tables for which updateTableSizeMetrics should be skipped because + // /table/{name}/size cannot return useful data (disabled tables, or tables with null IdealState). + private final Set _tablesToSkipSizeAggregation = new HashSet<>(); private final Set _pausedTables = new HashSet<>(); } } diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/SegmentStatusCheckerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/SegmentStatusCheckerTest.java index f2a949051e09..3e5a4db2e2b5 100644 --- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/SegmentStatusCheckerTest.java +++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/SegmentStatusCheckerTest.java @@ -58,10 +58,13 @@ import org.testng.annotations.Test; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -145,7 +148,8 @@ private SegmentZKMetadata mockPushedSegmentZKMetadata(long sizeInBytes, long pus return segmentZKMetadata; } - private void runSegmentStatusChecker(PinotHelixResourceManager resourceManager, int waitForPushTimeInSeconds) { + private TableSizeReader runSegmentStatusChecker(PinotHelixResourceManager resourceManager, + int waitForPushTimeInSeconds) { LeadControllerManager leadControllerManager = mock(LeadControllerManager.class); when(leadControllerManager.isLeaderForTable(anyString())).thenReturn(true); ControllerConf controllerConf = mock(ControllerConf.class); @@ -156,6 +160,7 @@ private void runSegmentStatusChecker(PinotHelixResourceManager resourceManager, tableSizeReader); segmentStatusChecker.start(); segmentStatusChecker.run(); + return tableSizeReader; } private void verifyControllerMetrics(String tableNameWithType, int expectedReplicationFromConfig, @@ -687,7 +692,8 @@ public void noSegmentZKMetadataTest() { } @Test - public void disabledTableTest() { + public void disabledTableTest() + throws Exception { IdealState idealState = new IdealState(OFFLINE_TABLE_NAME); // disable table in idealstate idealState.enable(false); @@ -701,9 +707,28 @@ public void disabledTableTest() { when(resourceManager.getAllTables()).thenReturn(List.of(OFFLINE_TABLE_NAME)); when(resourceManager.getTableIdealState(OFFLINE_TABLE_NAME)).thenReturn(idealState); - runSegmentStatusChecker(resourceManager, 0); + TableSizeReader tableSizeReader = runSegmentStatusChecker(resourceManager, 0); assertEquals(MetricValueUtils.getGlobalGaugeValue(_controllerMetrics, ControllerGauge.DISABLED_TABLE_COUNT), 1); verifyControllerMetricsNotExist(); + // Disabled tables should not trigger the size scatter-gather: servers do not load segments for disabled tables, + // so calling /table/{name}/size would return 404 from every server and flood logs (issue #14279). + verify(tableSizeReader, never()).getTableSizeDetails(anyString(), anyInt(), anyBoolean()); + } + + @Test + public void nullIdealStateTest() + throws Exception { + PinotHelixResourceManager resourceManager = mock(PinotHelixResourceManager.class); + when(resourceManager.getAllTables()).thenReturn(List.of(OFFLINE_TABLE_NAME)); + when(resourceManager.getTableIdealState(OFFLINE_TABLE_NAME)).thenReturn(null); + + TableSizeReader tableSizeReader = runSegmentStatusChecker(resourceManager, 0); + // Tables with null IdealState should not trigger the size scatter-gather either: getServerToSegmentsMap throws + // IllegalStateException when IdealState is missing, which would propagate to processTable's outer catch as an + // ERROR + stack trace per table per cycle. + verify(tableSizeReader, never()).getTableSizeDetails(anyString(), anyInt(), anyBoolean()); + // Disabled-table count should not be inflated by null-IdealState entries. + assertEquals(MetricValueUtils.getGlobalGaugeValue(_controllerMetrics, ControllerGauge.DISABLED_TABLE_COUNT), 0); } @Test