apache · aminghadersohi · May 6, 2026 · May 7, 2026 · May 7, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -377,6 +377,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 
 [tool.ruff.lint.per-file-ignores]
 "superset/mcp_service/app.py" = ["S608", "E501"]  # LLM instruction text: SQL examples (S608) and long lines in multiline string (E501)
+"superset/mcp_service/*/tool/list_*.py" = ["E501"]  # LLM docstring examples show full request shapes which exceed line length
 "scripts/*" = ["TID251"]
 "setup.py" = ["TID251"]
 "superset/config.py" = ["TID251"]

diff --git a/superset/mcp_service/chart/schemas.py b/superset/mcp_service/chart/schemas.py
@@ -537,8 +537,11 @@ class ChartFilter(ColumnOperator):
         "datasource_name",
     ] = Field(
         ...,
-        description="Column to filter on. Use get_schema(model_type='chart') for "
-        "available filter columns.",
+        description=(
+            "Column to filter on. Valid values: 'slice_name', 'viz_type', "
+            "'datasource_name'. Other column names are not valid filter columns "
+            "and will cause a validation error."
+        ),
     )
     opr: ColumnOperatorEnum = Field(
         ...,

diff --git a/superset/mcp_service/chart/tool/list_charts.py b/superset/mcp_service/chart/tool/list_charts.py
@@ -91,8 +91,24 @@ async def list_charts(
     Returns chart metadata including id, name, viz_type, URL, and last
     modified time.
 
-    Sortable columns for order_column: id, slice_name, viz_type, description,
-    changed_on, created_on
+    **IMPORTANT**: All parameters must be wrapped in a ``request`` object.
+    Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
+    keyword arguments — they will be rejected. Use the ``request`` wrapper::
+
+        # Correct usage
+        list_charts(request={"search": "revenue", "page": 1, "page_size": 10})
+        list_charts(request={"filters": [{"col": "slice_name", "opr": "sw", "value": "sales"}]})
+        list_charts()  # no arguments returns first page with defaults
+
+        # Wrong — causes pydantic validation errors
+        list_charts(search="revenue", page=1)  # DO NOT DO THIS
+
+    Valid filter columns for ``filters[].col``:
+        ``slice_name``, ``viz_type``, ``datasource_name``
+
+    Sortable columns for ``order_column``:
+        ``id``, ``slice_name``, ``viz_type``, ``description``,
+        ``changed_on``, ``created_on``
     """
     request = request or _DEFAULT_LIST_CHARTS_REQUEST.model_copy(deep=True)
     await ctx.info(

diff --git a/superset/mcp_service/dashboard/schemas.py b/superset/mcp_service/dashboard/schemas.py
@@ -176,8 +176,9 @@ class DashboardFilter(ColumnOperator):
     ] = Field(
         ...,
         description=(
-            "Column to filter on. Use "
-            "get_schema(model_type='dashboard') for available filter columns."
+            "Column to filter on. Valid values: 'dashboard_title', 'published', "
+            "'favorite'. Other column names are not valid filter columns and will "
+            "cause a validation error."
         ),
     )
     opr: ColumnOperatorEnum = Field(

diff --git a/superset/mcp_service/dashboard/tool/list_dashboards.py b/superset/mcp_service/dashboard/tool/list_dashboards.py
@@ -85,8 +85,24 @@ async def list_dashboards(
     including title, slug, URL, and last modified time. Use select_columns to
     request additional fields.
 
-    Sortable columns for order_column: id, dashboard_title, slug, published,
-    changed_on, created_on
+    **IMPORTANT**: All parameters must be wrapped in a ``request`` object.
+    Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
+    keyword arguments — they will be rejected. Use the ``request`` wrapper::
+
+        # Correct usage
+        list_dashboards(request={"search": "sales", "page": 1, "page_size": 10})
+        list_dashboards(request={"filters": [{"col": "dashboard_title", "opr": "sw", "value": "exec"}]})
+        list_dashboards()  # no arguments returns first page with defaults
+
+        # Wrong — causes pydantic validation errors
+        list_dashboards(search="sales", page=1)  # DO NOT DO THIS
+
+    Valid filter columns for ``filters[].col``:
+        ``dashboard_title``, ``published``, ``favorite``
+
+    Sortable columns for ``order_column``:
+        ``id``, ``dashboard_title``, ``slug``, ``published``,
+        ``changed_on``, ``created_on``
     """
     request = request or _DEFAULT_LIST_DASHBOARDS_REQUEST.model_copy(deep=True)
     await ctx.info(

diff --git a/superset/mcp_service/dataset/schemas.py b/superset/mcp_service/dataset/schemas.py
@@ -71,8 +71,11 @@ class DatasetFilter(ColumnOperator):
         "database_name",
     ] = Field(
         ...,
-        description="Column to filter on. Use get_schema(model_type='dataset') for "
-        "available filter columns.",
+        description=(
+            "Column to filter on. Valid values: 'table_name', 'schema', "
+            "'database_name'. Other column names (e.g. 'created_by_fk', 'id') "
+            "are not valid filter columns and will cause a validation error."
+        ),
     )
     opr: ColumnOperatorEnum = Field(
         ...,

diff --git a/superset/mcp_service/dataset/tool/list_datasets.py b/superset/mcp_service/dataset/tool/list_datasets.py
@@ -96,8 +96,23 @@ async def list_datasets(
     Returns dataset metadata including table name, schema, and last modified
     time.
 
-    Sortable columns for order_column: id, table_name, schema, changed_on,
-    created_on
+    **IMPORTANT**: All parameters must be wrapped in a ``request`` object.
+    Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
+    keyword arguments — they will be rejected. Use the ``request`` wrapper::
+
+        # Correct usage
+        list_datasets(request={"search": "sales", "page": 1, "page_size": 10})
+        list_datasets(request={"filters": [{"col": "table_name", "opr": "sw", "value": "orders"}]})
+        list_datasets()  # no arguments returns first page with defaults
+
+        # Wrong — causes pydantic validation errors
+        list_datasets(search="sales", page=1)  # DO NOT DO THIS
+
+    Valid filter columns for ``filters[].col``:
+        ``table_name``, ``schema``, ``database_name``
+
+    Sortable columns for ``order_column``:
+        ``id``, ``table_name``, ``schema``, ``changed_on``, ``created_on``
     """
     if ctx is None:
         raise RuntimeError("FastMCP context is required for list_datasets")

diff --git a/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py b/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py
@@ -1349,7 +1349,8 @@ def test_sortable_columns_in_docstring(self):
 
         # Check list_dashboards docstring for sortable columns documentation
         assert list_dashboards.__doc__ is not None
-        assert "Sortable columns for order_column:" in list_dashboards.__doc__
+        assert "Sortable columns for" in list_dashboards.__doc__
+        assert "order_column" in list_dashboards.__doc__
         for col in SORTABLE_DASHBOARD_COLUMNS:
             assert col in list_dashboards.__doc__
 

diff --git a/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py b/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
@@ -1700,7 +1700,8 @@ def test_sortable_columns_in_docstring(self):
 
         # Check list_datasets docstring for sortable columns documentation
         assert list_datasets.__doc__ is not None
-        assert "Sortable columns for order_column:" in list_datasets.__doc__
+        assert "Sortable columns for" in list_datasets.__doc__
+        assert "order_column" in list_datasets.__doc__
         for col in SORTABLE_DATASET_COLUMNS:
             assert col in list_datasets.__doc__
 
@@ -2080,3 +2081,90 @@ def test_owned_by_me_and_created_by_me_allowed(self):
         request = ListDatasetsRequest(owned_by_me=True, created_by_me=True)
         assert request.owned_by_me is True
         assert request.created_by_me is True
+
+
+class TestListDatasetsRequestWrapper:
+    """
+    Tests verifying that list_datasets requires a ``request`` wrapper object.
+
+    LLMs sometimes pass parameters like ``search``, ``page``, or ``page_size``
+    as flat top-level kwargs instead of nesting them inside a ``request``
+    object.  These tests confirm the correct call shape through both the Pydantic
+    schema and the actual MCP tool layer, and verify that invalid filter column
+    names (e.g. ``created_by_fk``) are rejected.
+    """
+
+    def test_request_wrapper_with_search(self):
+        """Parameters passed inside request= are accepted by the schema."""
+        request = ListDatasetsRequest(search="sales", page=1, page_size=10)
+        assert request.search == "sales"
+        assert request.page == 1
+        assert request.page_size == 10
+
+    def test_request_wrapper_defaults(self):
+        """No-arg constructor produces valid schema defaults."""
+        request = ListDatasetsRequest()
+        assert request.search is None
+        assert request.page == 1
+        assert request.filters == []
+
+    def test_dataset_filter_valid_col(self):
+        """Valid col values are accepted by DatasetFilter."""
+        for col in ("table_name", "schema", "database_name"):
+            f = DatasetFilter(col=col, opr="sw", value="test")
+            assert f.col == col
+
+    def test_dataset_filter_invalid_col_raises(self):
+        """Column names not in the Literal are rejected with a validation error.
+
+        This guards against LLMs passing ``created_by_fk`` or similar
+        internal column names that are not exposed as filter fields.
+        """
+        from pydantic import ValidationError
+
+        for bad_col in ("created_by_fk", "id", "database_id", "owner"):
+            with pytest.raises(ValidationError):
+                DatasetFilter(col=bad_col, opr="eq", value="1")
+
+    @patch("superset.daos.dataset.DatasetDAO.list")
+    @pytest.mark.asyncio
+    async def test_request_wrapper_enforced_by_tool(self, mock_list, mcp_server):
+        """The MCP tool layer accepts the request wrapper and returns results.
+
+        Verifies end-to-end that wrapping params in ``request={}`` works through
+        the actual FastMCP tool call, not just schema validation.
+        """
+        mock_list.return_value = ([], 0)
+        async with Client(mcp_server) as client:
+            result = await client.call_tool(
+                "list_datasets",
+                {"request": {"search": "sales", "page": 1, "page_size": 5}},
+            )
+        data = json.loads(result.content[0].text)
+        assert data["count"] == 0
+        assert data["datasets"] == []
+
+    @pytest.mark.asyncio
+    async def test_flat_kwargs_rejected(self, mcp_server):
+        """Passing search/page/page_size as top-level kwargs raises a ToolError
+        that specifically mentions the unexpected arguments.
+
+        This is the exact failure pattern from story #105712: LLMs call
+        ``list_datasets(search=..., page=..., page_size=...)`` instead of
+        ``list_datasets(request={...})``.
+        """
+        from fastmcp.exceptions import ToolError
+
+        with pytest.raises(ToolError) as exc_info:
+            async with Client(mcp_server) as client:
+                await client.call_tool(
+                    "list_datasets",
+                    {"search": "sales", "page": 1, "page_size": 10},
+                )
+        error_text = str(exc_info.value)
+        # The error must call out the unexpected arguments, not some unrelated failure
+        assert (
+            "search" in error_text
+            or "Unexpected" in error_text
+            or "request" in error_text
+        )