diff --git a/pyproject.toml b/pyproject.toml index fbbd8d82b7a0..0b586403c214 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -377,6 +377,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.lint.per-file-ignores] "superset/mcp_service/app.py" = ["S608", "E501"] # LLM instruction text: SQL examples (S608) and long lines in multiline string (E501) +"superset/mcp_service/*/tool/list_*.py" = ["E501"] # LLM docstring examples show full request shapes which exceed line length "scripts/*" = ["TID251"] "setup.py" = ["TID251"] "superset/config.py" = ["TID251"] diff --git a/superset/mcp_service/chart/schemas.py b/superset/mcp_service/chart/schemas.py index 1fdb4f43ab6d..d9f1c6b97e3b 100644 --- a/superset/mcp_service/chart/schemas.py +++ b/superset/mcp_service/chart/schemas.py @@ -537,8 +537,11 @@ class ChartFilter(ColumnOperator): "datasource_name", ] = Field( ..., - description="Column to filter on. Use get_schema(model_type='chart') for " - "available filter columns.", + description=( + "Column to filter on. Valid values: 'slice_name', 'viz_type', " + "'datasource_name'. Other column names are not valid filter columns " + "and will cause a validation error." + ), ) opr: ColumnOperatorEnum = Field( ..., diff --git a/superset/mcp_service/chart/tool/list_charts.py b/superset/mcp_service/chart/tool/list_charts.py index 2d53afc8d30b..bfb2c5645400 100644 --- a/superset/mcp_service/chart/tool/list_charts.py +++ b/superset/mcp_service/chart/tool/list_charts.py @@ -91,8 +91,24 @@ async def list_charts( Returns chart metadata including id, name, viz_type, URL, and last modified time. - Sortable columns for order_column: id, slice_name, viz_type, description, - changed_on, created_on + **IMPORTANT**: All parameters must be wrapped in a ``request`` object. + Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level + keyword arguments — they will be rejected. Use the ``request`` wrapper:: + + # Correct usage + list_charts(request={"search": "revenue", "page": 1, "page_size": 10}) + list_charts(request={"filters": [{"col": "slice_name", "opr": "sw", "value": "sales"}]}) + list_charts() # no arguments returns first page with defaults + + # Wrong — causes pydantic validation errors + list_charts(search="revenue", page=1) # DO NOT DO THIS + + Valid filter columns for ``filters[].col``: + ``slice_name``, ``viz_type``, ``datasource_name`` + + Sortable columns for ``order_column``: + ``id``, ``slice_name``, ``viz_type``, ``description``, + ``changed_on``, ``created_on`` """ request = request or _DEFAULT_LIST_CHARTS_REQUEST.model_copy(deep=True) await ctx.info( diff --git a/superset/mcp_service/dashboard/schemas.py b/superset/mcp_service/dashboard/schemas.py index 8a92d5858960..68a680863e54 100644 --- a/superset/mcp_service/dashboard/schemas.py +++ b/superset/mcp_service/dashboard/schemas.py @@ -176,8 +176,9 @@ class DashboardFilter(ColumnOperator): ] = Field( ..., description=( - "Column to filter on. Use " - "get_schema(model_type='dashboard') for available filter columns." + "Column to filter on. Valid values: 'dashboard_title', 'published', " + "'favorite'. Other column names are not valid filter columns and will " + "cause a validation error." ), ) opr: ColumnOperatorEnum = Field( diff --git a/superset/mcp_service/dashboard/tool/list_dashboards.py b/superset/mcp_service/dashboard/tool/list_dashboards.py index b1bc664c5934..8c479df27532 100644 --- a/superset/mcp_service/dashboard/tool/list_dashboards.py +++ b/superset/mcp_service/dashboard/tool/list_dashboards.py @@ -85,8 +85,24 @@ async def list_dashboards( including title, slug, URL, and last modified time. Use select_columns to request additional fields. - Sortable columns for order_column: id, dashboard_title, slug, published, - changed_on, created_on + **IMPORTANT**: All parameters must be wrapped in a ``request`` object. + Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level + keyword arguments — they will be rejected. Use the ``request`` wrapper:: + + # Correct usage + list_dashboards(request={"search": "sales", "page": 1, "page_size": 10}) + list_dashboards(request={"filters": [{"col": "dashboard_title", "opr": "sw", "value": "exec"}]}) + list_dashboards() # no arguments returns first page with defaults + + # Wrong — causes pydantic validation errors + list_dashboards(search="sales", page=1) # DO NOT DO THIS + + Valid filter columns for ``filters[].col``: + ``dashboard_title``, ``published``, ``favorite`` + + Sortable columns for ``order_column``: + ``id``, ``dashboard_title``, ``slug``, ``published``, + ``changed_on``, ``created_on`` """ request = request or _DEFAULT_LIST_DASHBOARDS_REQUEST.model_copy(deep=True) await ctx.info( diff --git a/superset/mcp_service/dataset/schemas.py b/superset/mcp_service/dataset/schemas.py index 9d706c84152f..b5aa475ff2e0 100644 --- a/superset/mcp_service/dataset/schemas.py +++ b/superset/mcp_service/dataset/schemas.py @@ -71,8 +71,11 @@ class DatasetFilter(ColumnOperator): "database_name", ] = Field( ..., - description="Column to filter on. Use get_schema(model_type='dataset') for " - "available filter columns.", + description=( + "Column to filter on. Valid values: 'table_name', 'schema', " + "'database_name'. Other column names (e.g. 'created_by_fk', 'id') " + "are not valid filter columns and will cause a validation error." + ), ) opr: ColumnOperatorEnum = Field( ..., diff --git a/superset/mcp_service/dataset/tool/list_datasets.py b/superset/mcp_service/dataset/tool/list_datasets.py index 84b7a71d15f9..099c15231ea6 100644 --- a/superset/mcp_service/dataset/tool/list_datasets.py +++ b/superset/mcp_service/dataset/tool/list_datasets.py @@ -96,8 +96,23 @@ async def list_datasets( Returns dataset metadata including table name, schema, and last modified time. - Sortable columns for order_column: id, table_name, schema, changed_on, - created_on + **IMPORTANT**: All parameters must be wrapped in a ``request`` object. + Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level + keyword arguments — they will be rejected. Use the ``request`` wrapper:: + + # Correct usage + list_datasets(request={"search": "sales", "page": 1, "page_size": 10}) + list_datasets(request={"filters": [{"col": "table_name", "opr": "sw", "value": "orders"}]}) + list_datasets() # no arguments returns first page with defaults + + # Wrong — causes pydantic validation errors + list_datasets(search="sales", page=1) # DO NOT DO THIS + + Valid filter columns for ``filters[].col``: + ``table_name``, ``schema``, ``database_name`` + + Sortable columns for ``order_column``: + ``id``, ``table_name``, ``schema``, ``changed_on``, ``created_on`` """ if ctx is None: raise RuntimeError("FastMCP context is required for list_datasets") diff --git a/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py b/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py index 64d31eb76e17..a2b4e1e5f98a 100644 --- a/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py +++ b/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py @@ -1349,7 +1349,8 @@ def test_sortable_columns_in_docstring(self): # Check list_dashboards docstring for sortable columns documentation assert list_dashboards.__doc__ is not None - assert "Sortable columns for order_column:" in list_dashboards.__doc__ + assert "Sortable columns for" in list_dashboards.__doc__ + assert "order_column" in list_dashboards.__doc__ for col in SORTABLE_DASHBOARD_COLUMNS: assert col in list_dashboards.__doc__ diff --git a/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py b/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py index bfe8410dde9d..25a8a4c0fc6e 100644 --- a/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py +++ b/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py @@ -1700,7 +1700,8 @@ def test_sortable_columns_in_docstring(self): # Check list_datasets docstring for sortable columns documentation assert list_datasets.__doc__ is not None - assert "Sortable columns for order_column:" in list_datasets.__doc__ + assert "Sortable columns for" in list_datasets.__doc__ + assert "order_column" in list_datasets.__doc__ for col in SORTABLE_DATASET_COLUMNS: assert col in list_datasets.__doc__ @@ -2080,3 +2081,90 @@ def test_owned_by_me_and_created_by_me_allowed(self): request = ListDatasetsRequest(owned_by_me=True, created_by_me=True) assert request.owned_by_me is True assert request.created_by_me is True + + +class TestListDatasetsRequestWrapper: + """ + Tests verifying that list_datasets requires a ``request`` wrapper object. + + LLMs sometimes pass parameters like ``search``, ``page``, or ``page_size`` + as flat top-level kwargs instead of nesting them inside a ``request`` + object. These tests confirm the correct call shape through both the Pydantic + schema and the actual MCP tool layer, and verify that invalid filter column + names (e.g. ``created_by_fk``) are rejected. + """ + + def test_request_wrapper_with_search(self) -> None: + """Parameters passed inside request= are accepted by the schema.""" + request = ListDatasetsRequest(search="sales", page=1, page_size=10) + assert request.search == "sales" + assert request.page == 1 + assert request.page_size == 10 + + def test_request_wrapper_defaults(self) -> None: + """No-arg constructor produces valid schema defaults.""" + request = ListDatasetsRequest() + assert request.search is None + assert request.page == 1 + assert request.filters == [] + + def test_dataset_filter_valid_col(self) -> None: + """Valid col values are accepted by DatasetFilter.""" + for col in ("table_name", "schema", "database_name"): + f = DatasetFilter(col=col, opr="sw", value="test") + assert f.col == col + + def test_dataset_filter_invalid_col_raises(self) -> None: + """Column names not in the Literal are rejected with a validation error. + + This guards against LLMs passing ``created_by_fk`` or similar + internal column names that are not exposed as filter fields. + """ + from pydantic import ValidationError + + for bad_col in ("created_by_fk", "id", "database_id", "owner"): + with pytest.raises(ValidationError): + DatasetFilter(col=bad_col, opr="eq", value="1") + + @patch("superset.daos.dataset.DatasetDAO.list") + @pytest.mark.asyncio + async def test_request_wrapper_enforced_by_tool( + self, mock_list, mcp_server + ) -> None: + """The MCP tool layer accepts the request wrapper and returns results. + + Verifies end-to-end that wrapping params in ``request={}`` works through + the actual FastMCP tool call, not just schema validation. + """ + mock_list.return_value = ([], 0) + async with Client(mcp_server) as client: + result = await client.call_tool( + "list_datasets", + {"request": {"search": "sales", "page": 1, "page_size": 5}}, + ) + data = json.loads(result.content[0].text) + assert data["count"] == 0 + assert data["datasets"] == [] + + @pytest.mark.asyncio + async def test_flat_kwargs_rejected(self, mcp_server) -> None: + """Passing search/page/page_size as top-level kwargs raises a ToolError + that specifically mentions the unexpected arguments. + + This is the exact failure pattern from story #105712: LLMs call + ``list_datasets(search=..., page=..., page_size=...)`` instead of + ``list_datasets(request={...})``. + """ + with pytest.raises(ToolError) as exc_info: + async with Client(mcp_server) as client: + await client.call_tool( + "list_datasets", + {"search": "sales", "page": 1, "page_size": 10}, + ) + error_text = str(exc_info.value) + # The error must call out the unexpected arguments, not some unrelated failure + assert ( + "search" in error_text + or "Unexpected" in error_text + or "request" in error_text + )