Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.lint.per-file-ignores]
"superset/mcp_service/app.py" = ["S608", "E501"] # LLM instruction text: SQL examples (S608) and long lines in multiline string (E501)
"superset/mcp_service/*/tool/list_*.py" = ["E501"] # LLM docstring examples show full request shapes which exceed line length
"scripts/*" = ["TID251"]
"setup.py" = ["TID251"]
"superset/config.py" = ["TID251"]
Expand Down
7 changes: 5 additions & 2 deletions superset/mcp_service/chart/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,11 @@ class ChartFilter(ColumnOperator):
"datasource_name",
] = Field(
...,
description="Column to filter on. Use get_schema(model_type='chart') for "
"available filter columns.",
description=(
"Column to filter on. Valid values: 'slice_name', 'viz_type', "
"'datasource_name'. Other column names are not valid filter columns "
"and will cause a validation error."
),
)
opr: ColumnOperatorEnum = Field(
...,
Expand Down
20 changes: 18 additions & 2 deletions superset/mcp_service/chart/tool/list_charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,24 @@ async def list_charts(
Returns chart metadata including id, name, viz_type, URL, and last
modified time.

Sortable columns for order_column: id, slice_name, viz_type, description,
changed_on, created_on
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
keyword arguments — they will be rejected. Use the ``request`` wrapper::

# Correct usage
list_charts(request={"search": "revenue", "page": 1, "page_size": 10})
list_charts(request={"filters": [{"col": "slice_name", "opr": "ct", "value": "sales"}]})
Comment thread
aminghadersohi marked this conversation as resolved.
Outdated
list_charts() # no arguments returns first page with defaults

# Wrong — causes pydantic validation errors
list_charts(search="revenue", page=1) # DO NOT DO THIS

Valid filter columns for ``filters[].col``:
``slice_name``, ``viz_type``, ``datasource_name``

Sortable columns for ``order_column``:
``id``, ``slice_name``, ``viz_type``, ``description``,
``changed_on``, ``created_on``
"""
request = request or _DEFAULT_LIST_CHARTS_REQUEST.model_copy(deep=True)
await ctx.info(
Expand Down
5 changes: 3 additions & 2 deletions superset/mcp_service/dashboard/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,9 @@ class DashboardFilter(ColumnOperator):
] = Field(
...,
description=(
"Column to filter on. Use "
"get_schema(model_type='dashboard') for available filter columns."
"Column to filter on. Valid values: 'dashboard_title', 'published', "
"'favorite'. Other column names are not valid filter columns and will "
"cause a validation error."
),
)
opr: ColumnOperatorEnum = Field(
Expand Down
20 changes: 18 additions & 2 deletions superset/mcp_service/dashboard/tool/list_dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,24 @@ async def list_dashboards(
including title, slug, URL, and last modified time. Use select_columns to
request additional fields.

Sortable columns for order_column: id, dashboard_title, slug, published,
changed_on, created_on
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
keyword arguments — they will be rejected. Use the ``request`` wrapper::

# Correct usage
list_dashboards(request={"search": "sales", "page": 1, "page_size": 10})
list_dashboards(request={"filters": [{"col": "dashboard_title", "opr": "ct", "value": "exec"}]})
Comment thread
aminghadersohi marked this conversation as resolved.
Outdated
list_dashboards() # no arguments returns first page with defaults

# Wrong — causes pydantic validation errors
list_dashboards(search="sales", page=1) # DO NOT DO THIS

Valid filter columns for ``filters[].col``:
``dashboard_title``, ``published``, ``favorite``

Sortable columns for ``order_column``:
``id``, ``dashboard_title``, ``slug``, ``published``,
``changed_on``, ``created_on``
"""
request = request or _DEFAULT_LIST_DASHBOARDS_REQUEST.model_copy(deep=True)
await ctx.info(
Expand Down
7 changes: 5 additions & 2 deletions superset/mcp_service/dataset/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,11 @@ class DatasetFilter(ColumnOperator):
"database_name",
] = Field(
...,
description="Column to filter on. Use get_schema(model_type='dataset') for "
"available filter columns.",
description=(
"Column to filter on. Valid values: 'table_name', 'schema', "
"'database_name'. Other column names (e.g. 'created_by_fk', 'id') "
"are not valid filter columns and will cause a validation error."
),
)
opr: ColumnOperatorEnum = Field(
...,
Expand Down
19 changes: 17 additions & 2 deletions superset/mcp_service/dataset/tool/list_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,23 @@ async def list_datasets(
Returns dataset metadata including table name, schema, and last modified
time.

Sortable columns for order_column: id, table_name, schema, changed_on,
created_on
**IMPORTANT**: All parameters must be wrapped in a ``request`` object.
Do NOT pass ``search``, ``page``, ``page_size``, etc. as top-level
keyword arguments — they will be rejected. Use the ``request`` wrapper::

# Correct usage
list_datasets(request={"search": "sales", "page": 1, "page_size": 10})
list_datasets(request={"filters": [{"col": "table_name", "opr": "ct", "value": "orders"}]})
Comment thread
aminghadersohi marked this conversation as resolved.
Outdated
list_datasets() # no arguments returns first page with defaults

# Wrong — causes pydantic validation errors
list_datasets(search="sales", page=1) # DO NOT DO THIS

Valid filter columns for ``filters[].col``:
``table_name``, ``schema``, ``database_name``

Sortable columns for ``order_column``:
``id``, ``table_name``, ``schema``, ``changed_on``, ``created_on``
"""
if ctx is None:
raise RuntimeError("FastMCP context is required for list_datasets")
Expand Down
60 changes: 60 additions & 0 deletions tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2080,3 +2080,63 @@ def test_owned_by_me_and_created_by_me_allowed(self):
request = ListDatasetsRequest(owned_by_me=True, created_by_me=True)
assert request.owned_by_me is True
assert request.created_by_me is True


class TestListDatasetsRequestWrapper:
"""
Tests verifying that list_datasets requires a ``request`` wrapper object.

LLMs sometimes pass parameters like ``search``, ``page``, or ``page_size``
as flat top-level kwargs instead of nesting them inside a ``request``
object. These tests confirm the correct call shape and that the schema
rejects invalid filter column names (e.g. ``created_by_fk``).
"""

def test_request_wrapper_with_search(self):
"""Parameters passed inside request= are accepted."""
request = ListDatasetsRequest(search="sales", page=1, page_size=10)
assert request.search == "sales"
assert request.page == 1
assert request.page_size == 10

def test_request_wrapper_defaults(self):
"""No-arg constructor produces valid defaults."""
request = ListDatasetsRequest()
assert request.search is None
assert request.page == 1
assert request.filters == []

Comment thread
aminghadersohi marked this conversation as resolved.
def test_dataset_filter_valid_col(self):
"""Valid col values are accepted by DatasetFilter."""
for col in ("table_name", "schema", "database_name"):
f = DatasetFilter(col=col, opr="ct", value="test")
assert f.col == col

def test_dataset_filter_invalid_col_raises(self):
"""Column names not in the Literal are rejected with a validation error.

This guards against LLMs passing ``created_by_fk`` or similar
internal column names that are not exposed as filter fields.
"""
from pydantic import ValidationError

for bad_col in ("created_by_fk", "id", "database_id", "owner"):
with pytest.raises(ValidationError):
DatasetFilter(col=bad_col, opr="eq", value="1")

@pytest.mark.asyncio
async def test_flat_kwargs_rejected(self, mcp_server):
"""Passing search/page/page_size as top-level kwargs raises a ToolError.

This is the exact failure pattern reported in story #105712: LLMs call
``list_datasets(search=..., page=..., page_size=...)`` instead of
``list_datasets(request={...})``.
"""
from fastmcp.exceptions import ToolError

with pytest.raises(ToolError):
async with Client(mcp_server) as client:
await client.call_tool(
"list_datasets",
{"search": "sales", "page": 1, "page_size": 10},
)
Comment thread
aminghadersohi marked this conversation as resolved.
Loading