Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,15 @@ protected void processTable(String tableNameWithType, Context context) {
TableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableNameWithType);
updateTableConfigMetrics(tableNameWithType, tableConfig, context);
updateSegmentMetrics(tableNameWithType, tableConfig, context);
updateTableSizeMetrics(tableNameWithType);
// Skip the /table/{name}/size scatter-gather when the table can't return useful size data. Two cases:
// - Disabled table: servers do not load segments, every server responds 404, controller logs flood with
// ERROR entries (issue #14279).
// - Null IdealState: no servers known to query; getServerToSegmentsMap throws IllegalStateException
// which propagates to the outer catch as an ERROR + stack trace per table per cycle.
// updateSegmentMetrics populates context._tablesToSkipSizeAggregation in both branches.
if (!context._tablesToSkipSizeAggregation.contains(tableNameWithType)) {
updateTableSizeMetrics(tableNameWithType);
}
} catch (Exception e) {
LOGGER.error("Caught exception while updating segment status for table {}", tableNameWithType, e);
// Remove the metric for this table
Expand Down Expand Up @@ -215,6 +223,7 @@ private void updateSegmentMetrics(String tableNameWithType, TableConfig tableCon
if (idealState == null) {
LOGGER.warn("Table {} has null ideal state. Skipping segment status checks", tableNameWithType);
removeMetricsForTable(tableNameWithType);
context._tablesToSkipSizeAggregation.add(tableNameWithType);
return;
}

Expand All @@ -224,6 +233,7 @@ private void updateSegmentMetrics(String tableNameWithType, TableConfig tableCon
}
removeMetricsForTable(tableNameWithType);
context._disabledTables.add(tableNameWithType);
context._tablesToSkipSizeAggregation.add(tableNameWithType);
return;
}

Expand Down Expand Up @@ -528,6 +538,9 @@ public static final class Context {
private final Map<String, Integer> _tierBackendTableCountMap = new HashMap<>();
private final Set<String> _processedTables = new HashSet<>();
private final Set<String> _disabledTables = new HashSet<>();
// Superset of _disabledTables: tables for which updateTableSizeMetrics should be skipped because
// /table/{name}/size cannot return useful data (disabled tables, or tables with null IdealState).
private final Set<String> _tablesToSkipSizeAggregation = new HashSet<>();
private final Set<String> _pausedTables = new HashSet<>();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,13 @@
import org.testng.annotations.Test;

import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
Expand Down Expand Up @@ -145,7 +148,8 @@ private SegmentZKMetadata mockPushedSegmentZKMetadata(long sizeInBytes, long pus
return segmentZKMetadata;
}

private void runSegmentStatusChecker(PinotHelixResourceManager resourceManager, int waitForPushTimeInSeconds) {
private TableSizeReader runSegmentStatusChecker(PinotHelixResourceManager resourceManager,
int waitForPushTimeInSeconds) {
LeadControllerManager leadControllerManager = mock(LeadControllerManager.class);
when(leadControllerManager.isLeaderForTable(anyString())).thenReturn(true);
ControllerConf controllerConf = mock(ControllerConf.class);
Expand All @@ -156,6 +160,7 @@ private void runSegmentStatusChecker(PinotHelixResourceManager resourceManager,
tableSizeReader);
segmentStatusChecker.start();
segmentStatusChecker.run();
return tableSizeReader;
}

private void verifyControllerMetrics(String tableNameWithType, int expectedReplicationFromConfig,
Expand Down Expand Up @@ -687,7 +692,8 @@ public void noSegmentZKMetadataTest() {
}

@Test
public void disabledTableTest() {
public void disabledTableTest()
throws Exception {
IdealState idealState = new IdealState(OFFLINE_TABLE_NAME);
// disable table in idealstate
idealState.enable(false);
Expand All @@ -701,9 +707,28 @@ public void disabledTableTest() {
when(resourceManager.getAllTables()).thenReturn(List.of(OFFLINE_TABLE_NAME));
when(resourceManager.getTableIdealState(OFFLINE_TABLE_NAME)).thenReturn(idealState);

runSegmentStatusChecker(resourceManager, 0);
TableSizeReader tableSizeReader = runSegmentStatusChecker(resourceManager, 0);
assertEquals(MetricValueUtils.getGlobalGaugeValue(_controllerMetrics, ControllerGauge.DISABLED_TABLE_COUNT), 1);
verifyControllerMetricsNotExist();
// Disabled tables should not trigger the size scatter-gather: servers do not load segments for disabled tables,
// so calling /table/{name}/size would return 404 from every server and flood logs (issue #14279).
verify(tableSizeReader, never()).getTableSizeDetails(anyString(), anyInt(), anyBoolean());
}

@Test
public void nullIdealStateTest()
throws Exception {
PinotHelixResourceManager resourceManager = mock(PinotHelixResourceManager.class);
when(resourceManager.getAllTables()).thenReturn(List.of(OFFLINE_TABLE_NAME));
when(resourceManager.getTableIdealState(OFFLINE_TABLE_NAME)).thenReturn(null);

TableSizeReader tableSizeReader = runSegmentStatusChecker(resourceManager, 0);
// Tables with null IdealState should not trigger the size scatter-gather either: getServerToSegmentsMap throws
// IllegalStateException when IdealState is missing, which would propagate to processTable's outer catch as an
// ERROR + stack trace per table per cycle.
verify(tableSizeReader, never()).getTableSizeDetails(anyString(), anyInt(), anyBoolean());
// Disabled-table count should not be inflated by null-IdealState entries.
assertEquals(MetricValueUtils.getGlobalGaugeValue(_controllerMetrics, ControllerGauge.DISABLED_TABLE_COUNT), 0);
}

@Test
Expand Down