diff --git a/superset/mcp_service/chart/ascii_charts.py b/superset/mcp_service/chart/ascii_charts.py index b5aa4d71f491..d8af0be3d6eb 100644 --- a/superset/mcp_service/chart/ascii_charts.py +++ b/superset/mcp_service/chart/ascii_charts.py @@ -79,7 +79,12 @@ def _generate_ascii_bar_chart(data: list[Any], width: int, height: int) -> str: label_val = None for _key, val in row.items(): - if isinstance(val, (int, float)) and numeric_val is None: + if ( + isinstance(val, (int, float)) + and not isinstance(val, bool) + and not _is_nan_value(val) + and numeric_val is None + ): numeric_val = val elif isinstance(val, str) and label_val is None: label_val = val @@ -121,7 +126,10 @@ def _generate_horizontal_bar_chart( # Calculate bar length if max_val > min_val: normalized = (value - min_val) / (max_val - min_val) - bar_length = max(1, int(normalized * max_bar_width)) + if _is_nan_value(normalized): + bar_length = 0 + else: + bar_length = max(1, int(normalized * max_bar_width)) else: bar_length = 1 @@ -164,7 +172,10 @@ def _generate_vertical_bar_chart( # noqa: C901 for col, value in enumerate(values): if max_val > min_val: normalized = (value - min_val) / (max_val - min_val) - bar_height = max(1, int(normalized * chart_height)) + if _is_nan_value(normalized): + bar_height = 0 + else: + bar_height = max(1, int(normalized * chart_height)) else: bar_height = 1 @@ -274,7 +285,12 @@ def _extract_time_series_data(data: list[Any]) -> tuple[list[float], list[str]]: label_val = None for key, val in row.items(): - if isinstance(val, (int, float)) and numeric_val is None: + if ( + isinstance(val, (int, float)) + and not isinstance(val, bool) + and not _is_nan_value(val) + and numeric_val is None + ): numeric_val = val elif isinstance(val, str) and label_val is None: # Use the key name if it looks like a date/time field @@ -520,9 +536,11 @@ def _extract_scatter_data( if data and isinstance(data[0], dict): # Find the first two numeric columns for key, val in data[0].items(): - if isinstance(val, (int, float)) and not ( - isinstance(val, float) and (val != val) - ): # Exclude NaN + if ( + isinstance(val, (int, float)) + and not isinstance(val, bool) + and not _is_nan_value(val) + ): numeric_columns.append(key) if len(numeric_columns) >= 2: @@ -534,15 +552,14 @@ def _extract_scatter_data( if isinstance(row, dict): x_val = row.get(x_column) y_val = row.get(y_column) - # Check for valid numbers (not NaN) if ( isinstance(x_val, (int, float)) + and not isinstance(x_val, bool) + and not _is_nan_value(x_val) and isinstance(y_val, (int, float)) - and not ( - isinstance(x_val, float) and (x_val != x_val) - ) # Not NaN - and not (isinstance(y_val, float) and (y_val != y_val)) - ): # Not NaN + and not isinstance(y_val, bool) + and not _is_nan_value(y_val) + ): x_values.append(x_val) y_values.append(y_val) diff --git a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py new file mode 100644 index 000000000000..45371f0f07a8 --- /dev/null +++ b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py @@ -0,0 +1,175 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Unit tests for ascii_charts.py NaN/null value handling.""" + +from superset.mcp_service.chart.ascii_charts import generate_ascii_chart + + +def test_bar_chart_with_null_values_does_not_raise() -> None: + """Bar chart renderer must not crash when dataset rows contain NaN values.""" + data = [ + {"category": "A", "value": 10.0}, + {"category": "B", "value": float("nan")}, + {"category": "C", "value": 30.0}, + ] + result = generate_ascii_chart(data, "bar") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_bar_chart_with_all_null_values_returns_fallback() -> None: + """Bar chart with no valid numeric rows should return the no-data fallback.""" + data = [ + {"category": "A", "value": float("nan")}, + {"category": "B", "value": float("nan")}, + ] + result = generate_ascii_chart(data, "bar") + assert isinstance(result, str) + assert result == "No numeric data found for bar chart" + + +def test_line_chart_with_null_values_does_not_raise() -> None: + """Line chart renderer must not crash when dataset rows contain NaN values.""" + data = [ + {"date": "2024-01", "sales": 100.0}, + {"date": "2024-02", "sales": float("nan")}, + {"date": "2024-03", "sales": 300.0}, + ] + result = generate_ascii_chart(data, "line") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_horizontal_bar_chart_nan_rows_are_skipped() -> None: + """NaN rows must be silently skipped; valid rows render normally.""" + # Use labels longer than 8 chars to force horizontal layout, where full + # label text is preserved (vertical layout truncates to 3 chars). + data = [ + {"label": "Alpha Category", "amount": 50.0}, + {"label": "Beta Category", "amount": float("nan")}, + {"label": "Gamma Category", "amount": 150.0}, + ] + result = generate_ascii_chart(data, "bar") + # avg label length is 14 (> 8 threshold), so horizontal layout is chosen; + # horizontal mode preserves full label text unlike vertical (3-char truncation). + assert "Horizontal Bar Chart" in result + # Both valid labels must appear in full; the NaN row (Beta) must be absent + assert "Alpha" in result + assert "Gamma" in result + assert "Beta" not in result + + +def test_column_chart_with_null_values_does_not_raise() -> None: + """Column (vertical bar) chart must not crash on NaN values.""" + data = [ + {"x": "Q1", "y": 10.0}, + {"x": "Q2", "y": float("nan")}, + {"x": "Q3", "y": 30.0}, + {"x": "Q4", "y": 40.0}, + ] + result = generate_ascii_chart(data, "column") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_timeseries_bar_with_null_values_does_not_raise() -> None: + """echarts_timeseries_bar chart type must not crash on NaN values.""" + data = [ + {"ts": "2024-01-01", "count": 5.0}, + {"ts": "2024-01-02", "count": float("nan")}, + {"ts": "2024-01-03", "count": 15.0}, + ] + result = generate_ascii_chart(data, "echarts_timeseries_bar") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_chart_with_none_values_does_not_raise() -> None: + """None (SQL NULL) values should be treated identically to NaN.""" + data = [ + {"name": "X", "metric": 100.0}, + {"name": "Y", "metric": None}, + {"name": "Z", "metric": 200.0}, + ] + result = generate_ascii_chart(data, "bar") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_bar_chart_skips_boolean_columns() -> None: + """Boolean fields must not be selected as the numeric metric. + + bool is a subclass of int, so isinstance(True, (int, float)) is True. + Without an explicit bool guard the extractor would lock onto a boolean + column (e.g. is_active=True -> 1) and ignore the real numeric metric. + """ + data = [ + {"label": "Alpha Category", "is_active": True, "revenue": 500.0}, + {"label": "Beta Category", "is_active": False, "revenue": 1500.0}, + {"label": "Gamma Category", "is_active": True, "revenue": 1000.0}, + ] + result = generate_ascii_chart(data, "bar") + # If booleans are correctly skipped, revenue (500/1500/1000) drives the + # bars. The max value is 1500, so we expect at least one K-formatted value. + assert "1.5K" in result or "1500" in result or "1.0K" in result + # The scale min/max would be "0.0" and "1.0" only if booleans were chosen; + # with revenue selected the scale starts at 500 (never "Scale: 0.0"). + assert "Scale: 0.0" not in result + + +def test_line_chart_skips_boolean_columns() -> None: + """Boolean fields must not be selected as numeric points in line charts.""" + data = [ + {"date": "2024-01", "is_active": True, "sales": 100.0}, + {"date": "2024-02", "is_active": False, "sales": 200.0}, + {"date": "2024-03", "is_active": True, "sales": 300.0}, + ] + result = generate_ascii_chart(data, "line") + assert isinstance(result, str) + assert len(result) > 0 + # If booleans were selected, the range would be 0-1; if revenue is + # selected the range includes values up to 300. + assert "300" in result or "200" in result + + +def test_scatter_chart_with_nan_values_does_not_raise() -> None: + """Scatter chart renderer must not crash when dataset rows contain NaN values.""" + data = [ + {"x": 1.0, "y": 2.0}, + {"x": float("nan"), "y": 4.0}, + {"x": 5.0, "y": float("nan")}, + {"x": 7.0, "y": 8.0}, + ] + result = generate_ascii_chart(data, "scatter") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_scatter_chart_skips_boolean_columns() -> None: + """Boolean fields must not be selected as X/Y axes in scatter charts.""" + data = [ + {"is_active": True, "x": 10.0, "y": 20.0}, + {"is_active": False, "x": 30.0, "y": 40.0}, + {"is_active": True, "x": 50.0, "y": 60.0}, + ] + result = generate_ascii_chart(data, "scatter") + # If booleans are correctly skipped, x/y (10-50 / 20-60) drive the axes; + # boolean-driven axes would be confined to 0-1. + assert isinstance(result, str) + assert len(result) > 0 + assert "10" in result or "30" in result or "50" in result