Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.lint.per-file-ignores]
"superset/mcp_service/app.py" = ["S608", "E501"] # LLM instruction text: SQL examples (S608) and long lines in multiline string (E501)
"superset/mcp_service/*/tool/list_*.py" = ["E501"] # LLM docstring examples show full request shapes which exceed line length
"scripts/*" = ["TID251"]
"setup.py" = ["TID251"]
"superset/config.py" = ["TID251"]
Expand Down
7 changes: 5 additions & 2 deletions superset/mcp_service/chart/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,11 @@ class ChartFilter(ColumnOperator):
"datasource_name",
] = Field(
...,
description="Column to filter on. Use get_schema(model_type='chart') for "
"available filter columns.",
description=(
"Column to filter on. Valid values: 'slice_name', 'viz_type', "
"'datasource_name'. Other column names are not valid filter columns "
"and will cause a validation error."
),
)
opr: ColumnOperatorEnum = Field(
...,
Expand Down
20 changes: 18 additions & 2 deletions superset/mcp_service/chart/tool/list_charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,24 @@ async def list_charts(
Returns chart metadata including id, name, viz_type, URL, and last
modified time.

Sortable columns for order_column: id, slice_name, viz_type, description,
changed_on, created_on
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
keyword arguments — they will be rejected. Use the ``request`` wrapper::

# Correct usage
list_charts(request={"search": "revenue", "page": 1, "page_size": 10})
list_charts(request={"filters": [{"col": "slice_name", "opr": "sw", "value": "sales"}]})
list_charts() # no arguments returns first page with defaults

# Wrong — causes pydantic validation errors
list_charts(search="revenue", page=1) # DO NOT DO THIS

Valid filter columns for ``filters[].col``:
``slice_name``, ``viz_type``, ``datasource_name``

Sortable columns for ``order_column``:
``id``, ``slice_name``, ``viz_type``, ``description``,
``changed_on``, ``created_on``
"""
request = request or _DEFAULT_LIST_CHARTS_REQUEST.model_copy(deep=True)
await ctx.info(
Expand Down
5 changes: 3 additions & 2 deletions superset/mcp_service/dashboard/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,9 @@ class DashboardFilter(ColumnOperator):
] = Field(
...,
description=(
"Column to filter on. Use "
"get_schema(model_type='dashboard') for available filter columns."
"Column to filter on. Valid values: 'dashboard_title', 'published', "
"'favorite'. Other column names are not valid filter columns and will "
"cause a validation error."
),
)
opr: ColumnOperatorEnum = Field(
Expand Down
20 changes: 18 additions & 2 deletions superset/mcp_service/dashboard/tool/list_dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,24 @@ async def list_dashboards(
including title, slug, URL, and last modified time. Use select_columns to
request additional fields.

Sortable columns for order_column: id, dashboard_title, slug, published,
changed_on, created_on
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
keyword arguments — they will be rejected. Use the ``request`` wrapper::

# Correct usage
list_dashboards(request={"search": "sales", "page": 1, "page_size": 10})
list_dashboards(request={"filters": [{"col": "dashboard_title", "opr": "sw", "value": "exec"}]})
list_dashboards() # no arguments returns first page with defaults

# Wrong — causes pydantic validation errors
list_dashboards(search="sales", page=1) # DO NOT DO THIS

Valid filter columns for ``filters[].col``:
``dashboard_title``, ``published``, ``favorite``

Sortable columns for ``order_column``:
``id``, ``dashboard_title``, ``slug``, ``published``,
``changed_on``, ``created_on``
"""
request = request or _DEFAULT_LIST_DASHBOARDS_REQUEST.model_copy(deep=True)
await ctx.info(
Expand Down
7 changes: 5 additions & 2 deletions superset/mcp_service/dataset/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,11 @@ class DatasetFilter(ColumnOperator):
"database_name",
] = Field(
...,
description="Column to filter on. Use get_schema(model_type='dataset') for "
"available filter columns.",
description=(
"Column to filter on. Valid values: 'table_name', 'schema', "
"'database_name'. Other column names (e.g. 'created_by_fk', 'id') "
"are not valid filter columns and will cause a validation error."
),
)
opr: ColumnOperatorEnum = Field(
...,
Expand Down
19 changes: 17 additions & 2 deletions superset/mcp_service/dataset/tool/list_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,23 @@ async def list_datasets(
Returns dataset metadata including table name, schema, and last modified
time.

Sortable columns for order_column: id, table_name, schema, changed_on,
created_on
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
keyword arguments — they will be rejected. Use the ``request`` wrapper::

# Correct usage
list_datasets(request={"search": "sales", "page": 1, "page_size": 10})
list_datasets(request={"filters": [{"col": "table_name", "opr": "sw", "value": "orders"}]})
list_datasets() # no arguments returns first page with defaults

# Wrong — causes pydantic validation errors
list_datasets(search="sales", page=1) # DO NOT DO THIS

Valid filter columns for ``filters[].col``:
``table_name``, ``schema``, ``database_name``

Sortable columns for ``order_column``:
``id``, ``table_name``, ``schema``, ``changed_on``, ``created_on``
"""
if ctx is None:
raise RuntimeError("FastMCP context is required for list_datasets")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,8 @@ def test_sortable_columns_in_docstring(self):

# Check list_dashboards docstring for sortable columns documentation
assert list_dashboards.__doc__ is not None
assert "Sortable columns for order_column:" in list_dashboards.__doc__
assert "Sortable columns for" in list_dashboards.__doc__
assert "order_column" in list_dashboards.__doc__
for col in SORTABLE_DASHBOARD_COLUMNS:
assert col in list_dashboards.__doc__

Expand Down
90 changes: 89 additions & 1 deletion tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1700,7 +1700,8 @@ def test_sortable_columns_in_docstring(self):

# Check list_datasets docstring for sortable columns documentation
assert list_datasets.__doc__ is not None
assert "Sortable columns for order_column:" in list_datasets.__doc__
assert "Sortable columns for" in list_datasets.__doc__
assert "order_column" in list_datasets.__doc__
for col in SORTABLE_DATASET_COLUMNS:
assert col in list_datasets.__doc__

Expand Down Expand Up @@ -2080,3 +2081,90 @@ def test_owned_by_me_and_created_by_me_allowed(self):
request = ListDatasetsRequest(owned_by_me=True, created_by_me=True)
assert request.owned_by_me is True
assert request.created_by_me is True


class TestListDatasetsRequestWrapper:
"""
Tests verifying that list_datasets requires a ``request`` wrapper object.

LLMs sometimes pass parameters like ``search``, ``page``, or ``page_size``
as flat top-level kwargs instead of nesting them inside a ``request``
object. These tests confirm the correct call shape through both the Pydantic
schema and the actual MCP tool layer, and verify that invalid filter column
names (e.g. ``created_by_fk``) are rejected.
"""

def test_request_wrapper_with_search(self):
"""Parameters passed inside request= are accepted by the schema."""
request = ListDatasetsRequest(search="sales", page=1, page_size=10)
assert request.search == "sales"
assert request.page == 1
assert request.page_size == 10

def test_request_wrapper_defaults(self):
"""No-arg constructor produces valid schema defaults."""
request = ListDatasetsRequest()
assert request.search is None
assert request.page == 1
assert request.filters == []

Comment thread
aminghadersohi marked this conversation as resolved.
def test_dataset_filter_valid_col(self):
"""Valid col values are accepted by DatasetFilter."""
for col in ("table_name", "schema", "database_name"):
f = DatasetFilter(col=col, opr="sw", value="test")
assert f.col == col

def test_dataset_filter_invalid_col_raises(self):
"""Column names not in the Literal are rejected with a validation error.

This guards against LLMs passing ``created_by_fk`` or similar
internal column names that are not exposed as filter fields.
"""
from pydantic import ValidationError

for bad_col in ("created_by_fk", "id", "database_id", "owner"):
with pytest.raises(ValidationError):
DatasetFilter(col=bad_col, opr="eq", value="1")

@patch("superset.daos.dataset.DatasetDAO.list")
@pytest.mark.asyncio
async def test_request_wrapper_enforced_by_tool(self, mock_list, mcp_server):
"""The MCP tool layer accepts the request wrapper and returns results.

Verifies end-to-end that wrapping params in ``request={}`` works through
the actual FastMCP tool call, not just schema validation.
"""
mock_list.return_value = ([], 0)
async with Client(mcp_server) as client:
result = await client.call_tool(
"list_datasets",
{"request": {"search": "sales", "page": 1, "page_size": 5}},
)
data = json.loads(result.content[0].text)
assert data["count"] == 0
assert data["datasets"] == []

@pytest.mark.asyncio
async def test_flat_kwargs_rejected(self, mcp_server):
"""Passing search/page/page_size as top-level kwargs raises a ToolError
that specifically mentions the unexpected arguments.

This is the exact failure pattern from story #105712: LLMs call
``list_datasets(search=..., page=..., page_size=...)`` instead of
``list_datasets(request={...})``.
"""
from fastmcp.exceptions import ToolError

with pytest.raises(ToolError) as exc_info:
async with Client(mcp_server) as client:
await client.call_tool(
"list_datasets",
{"search": "sales", "page": 1, "page_size": 10},
)
Comment thread
aminghadersohi marked this conversation as resolved.
error_text = str(exc_info.value)
# The error must call out the unexpected arguments, not some unrelated failure
assert (
"search" in error_text
or "Unexpected" in error_text
or "request" in error_text
)
Loading