From b2ac06d2f1b98b2b55552a14acf4a27cb38ccd76 Mon Sep 17 00:00:00 2001 From: Radoslav Dimitrov Date: Wed, 3 Jun 2026 18:04:11 +0300 Subject: [PATCH 1/5] fix(validators): adopt boundary-anchored mcp-name match in PyPI and NuGet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stacked on the cargo follow-up (introduces containsMCPNameToken). This extends the boundary-anchored ownership-token match to the PyPI and NuGet validators, replacing their bare strings.Contains checks so a README declaring a longer name (e.g. io.github.acme/widget-pro) no longer satisfies a claim for a shorter prefix (io.github.acme/widget). ⚠️ BEHAVIOR CHANGE for PyPI/NuGet (not just additive): The new match is strictly stricter — it can only flip a previously-passing publish to failing, never the reverse. The realistic case that flips is a README whose ONLY occurrence of the token is immediately followed by a server-name character [A-Za-z0-9._/-], e.g. a trailing period in prose ("...published as mcp-name: io.github.acme/widget."). The token on its own line, in backticks, or followed by whitespace/newline/HTML-tag is unaffected. Re-validation runs only at publish time (CreateServer); edits/status updates do not re-check ownership and there is no background re-validation, so already- stored servers are not affected — but an existing PyPI/NuGet publisher pushing a NEW VERSION with the token in the glued form would fail where it previously passed. Given the v0.1 API freeze, this should land deliberately and not be promoted to prod without sign-off. Live positive tests (time-mcp-pypi, TimeMcpServer) still pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/validators/registries/nuget.go | 8 ++++---- internal/validators/registries/pypi.go | 9 ++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/internal/validators/registries/nuget.go b/internal/validators/registries/nuget.go index 0c6d4c51..f8f97d7a 100644 --- a/internal/validators/registries/nuget.go +++ b/internal/validators/registries/nuget.go @@ -237,10 +237,10 @@ func validateReadme(ctx context.Context, serverName, lowerID, lowerVersion strin readmeContent := string(readmeBytes) - // Check for mcp-name: format (more specific) - mcpNamePattern := "mcp-name: " + serverName - if strings.Contains(readmeContent, mcpNamePattern) { - return ValidReadme, nil // Found as mcp-name: format + // Check for the mcp-name: ownership token (boundary-anchored + // to avoid prefix confusion — see containsMCPNameToken). + if containsMCPNameToken(readmeContent, serverName) { + return ValidReadme, nil } return InvalidReadme, nil diff --git a/internal/validators/registries/pypi.go b/internal/validators/registries/pypi.go index 2bac843b..04068a3e 100644 --- a/internal/validators/registries/pypi.go +++ b/internal/validators/registries/pypi.go @@ -7,7 +7,6 @@ import ( "fmt" "net/http" "net/url" - "strings" "time" "github.com/modelcontextprotocol/registry/pkg/model" @@ -85,10 +84,10 @@ func ValidatePyPI(ctx context.Context, pkg model.Package, serverName string) err // Check description (README) content description := pypiResp.Info.Description - // Check for mcp-name: format (more specific) - mcpNamePattern := "mcp-name: " + serverName - if strings.Contains(description, mcpNamePattern) { - return nil // Found as mcp-name: format + // Check for the mcp-name: ownership token (boundary-anchored to + // avoid prefix confusion — see containsMCPNameToken). + if containsMCPNameToken(description, serverName) { + return nil } return fmt.Errorf("PyPI package '%s' ownership validation failed. The server name '%s' must appear as 'mcp-name: %s' in the package README", pkg.Identifier, serverName, serverName) From d6be80ce5f2ab543b337f016c68a27283fa8c6f3 Mon Sep 17 00:00:00 2001 From: Radoslav Dimitrov Date: Fri, 5 Jun 2026 01:44:49 +0300 Subject: [PATCH 2/5] fix(validators): treat HTML comment close as an mcp-name boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The boundary-anchored matcher rejected the documented hidden-comment form when the trailing space was omitted: `` / `` fail because the byte after NAME is `-` (a server-name char). Since PyPI/NuGet publishers commonly hide the token in an HTML comment, this would break the recommended form on the next publish or edit. Add isMCPNameBoundary, which treats the HTML comment close (`-->` / `--!>`) immediately after the name as a boundary, so all spacing variants of the comment form validate while a genuine longer name (e.g. `…/widget--pro`) still does not. Tests: comment-form cases (spaced/unspaced/legacy `--!>`) and a double-hyphen longer-name negative; plus FuzzContainsMCPNameToken pinning the safety property that the matcher is strictly stricter than strings.Contains (can only flip pass→fail, never fail→pass) — verified over 2.3M executions. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/validators/registries/mcpname.go | 31 ++++++++++--- .../registries/mcpname_internal_test.go | 43 ++++++++++++++++++- 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/internal/validators/registries/mcpname.go b/internal/validators/registries/mcpname.go index a757db3b..4f04b0a2 100644 --- a/internal/validators/registries/mcpname.go +++ b/internal/validators/registries/mcpname.go @@ -18,6 +18,28 @@ func isServerNameChar(c byte) bool { } } +// isMCPNameBoundary reports whether the content immediately following a matched +// server name terminates the token, so that the matched name is not merely a +// prefix of a longer declared name. +// +// A boundary is the end of content, any non-server-name character (whitespace, a +// newline, or an HTML tag delimiter such as `<`), or the start of an HTML comment +// close (`-->` / `--!>`). The comment-close case matters because PyPI and NuGet +// publishers commonly hide the token in ``, and authors +// (or minifiers) frequently omit the space before `-->`, producing +// ``. There the byte after NAME is `-`, which is a +// server-name character, so without this case the documented hidden-comment form +// would fail to validate. +func isMCPNameBoundary(rest string) bool { + if rest == "" { + return true + } + if !isServerNameChar(rest[0]) { + return true + } + return strings.HasPrefix(rest, "-->") || strings.HasPrefix(rest, "--!>") +} + // containsMCPNameToken reports whether the package README/description contains // the ownership token "mcp-name: " as a complete token — i.e. the // matched server name is not merely a prefix of a longer declared name. @@ -27,10 +49,9 @@ func isServerNameChar(c byte) bool { // satisfy an ownership claim for the shorter `io.github.acme/widget`, because the // shorter string is a substring of the longer one. This is contained by namespace // authorization (a publisher can only claim names within a namespace they own), -// but it still weakens the crate↔server-name binding the token is meant to prove, -// so we require a trailing boundary: the character following the server name must -// be the end of the content or any non-server-name character (whitespace, a -// newline, or an HTML tag delimiter from a rendered README such as `<`). +// but it still weakens the package↔server-name binding the token is meant to +// prove, so we require a trailing boundary after the server name (see +// isMCPNameBoundary). // // Shared by the README-token validators (PyPI, NuGet, Cargo). NPM is unaffected // because it compares an exact metadata field rather than scanning README text. @@ -43,7 +64,7 @@ func containsMCPNameToken(content, serverName string) bool { return false } tokenEnd := searchFrom + idx + len(token) - if tokenEnd >= len(content) || !isServerNameChar(content[tokenEnd]) { + if isMCPNameBoundary(content[tokenEnd:]) { return true } // This occurrence is a prefix of a longer name; keep scanning in case a diff --git a/internal/validators/registries/mcpname_internal_test.go b/internal/validators/registries/mcpname_internal_test.go index 0d693523..bbbf3482 100644 --- a/internal/validators/registries/mcpname_internal_test.go +++ b/internal/validators/registries/mcpname_internal_test.go @@ -1,6 +1,9 @@ package registries -import "testing" +import ( + "strings" + "testing" +) // TestContainsMCPNameToken covers the boundary-anchored ownership-token match // shared by the PyPI, NuGet, and Cargo validators — in particular that a server @@ -23,6 +26,17 @@ func TestContainsMCPNameToken(t *testing.T) { {"absent", "nothing to see here", false}, {"different name", "mcp-name: io.github.other/thing\n", false}, {"prefix occurrence before a real one still matches", "mcp-name: io.github.acme/widget-pro then mcp-name: io.github.acme/widget\n", true}, + + // HTML hidden-comment form (documented for PyPI/NuGet). The canonical + // spaced form has always passed; the no-space variants must pass too, + // since the byte after the name is the `-` of the comment close. + {"comment, spaced (canonical)", "", true}, + {"comment, no trailing space", "", true}, + {"comment, no spaces at all", "", true}, + {"legacy comment close --!>", "", true}, + // A genuine longer name with a double hyphen is still NOT a match for the + // shorter claim (it is not an HTML comment close). + {"double-hyphen longer name not a match", "mcp-name: io.github.acme/widget--pro\n", false}, } for _, tc := range cases { @@ -33,3 +47,30 @@ func TestContainsMCPNameToken(t *testing.T) { }) } } + +// FuzzContainsMCPNameToken pins the core safety property of the boundary-anchored +// matcher: it is strictly stricter than a bare substring check. A true result +// must imply the literal token is present (strings.Contains). This guards against +// a future edit to isServerNameChar/isMCPNameBoundary accidentally accepting +// something the old behavior rejected — i.e. it can only ever flip pass→fail, +// never fail→pass. Runs the seed corpus under `go test`; exhaustively under +// `go test -fuzz`. +func FuzzContainsMCPNameToken(f *testing.F) { + seeds := []struct{ content, name string }{ + {"mcp-name: io.github.acme/widget", "io.github.acme/widget"}, + {"mcp-name: io.github.acme/widget-pro", "io.github.acme/widget"}, + {"", "io.github.acme/widget"}, + {"prefix mcp-name: a/b then mcp-name: a/b-c", "a/b"}, + {"", ""}, + {"mcp-name: ", ""}, + {"random text with no token", "io.github.x/y"}, + } + for _, s := range seeds { + f.Add(s.content, s.name) + } + f.Fuzz(func(t *testing.T, content, name string) { + if containsMCPNameToken(content, name) && !strings.Contains(content, "mcp-name: "+name) { + t.Fatalf("matcher accepted but literal token absent: content=%q name=%q", content, name) + } + }) +} From 70b85b8f2a2e3cd6d728ac480589e8d26885e563 Mon Sep 17 00:00:00 2001 From: Radoslav Dimitrov Date: Fri, 5 Jun 2026 01:44:49 +0300 Subject: [PATCH 3/5] fix(cargo): pin scheme/port, classify transient existence-probe, soften 403 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-up hardening fixes to the cargo validator (review of #1330): - SSRF: the README host pin and the redirect policy keyed on Hostname() only, so any port/scheme on crates.io/static.crates.io was accepted. cargoURLAllowed now additionally requires https + the default port for the real crates.io base (test/mock bases still match on host only, so httptest fixtures keep working). - Transient 403 disambiguation: when the README CDN 403s, the crate-version existence probe previously reported "not found" if the probe itself failed (429/5xx/network) — the same misclassification the 5xx handling fixed one layer up. probeCargoVersion now returns a four-state result and a transient probe yields a retryable message instead of "not found". - A 403 with the crate present no longer flatly asserts "no rendered README" (a 403 isn't definitive proof — could be a CDN/WAF block); the message now says the README could not be retrieved and gives the actionable next step. Tests: TestCargoURLAllowed (https/port/userinfo/foreign-host matrix) and a combined-fixture case where the 403 existence-probe is rate-limited (429) and must report transient, not "not found". Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/validators/registries/cargo.go | 114 ++++++++++++------ .../registries/cargo_internal_test.go | 47 ++++++++ internal/validators/registries/cargo_test.go | 28 ++++- 3 files changed, 149 insertions(+), 40 deletions(-) create mode 100644 internal/validators/registries/cargo_internal_test.go diff --git a/internal/validators/registries/cargo.go b/internal/validators/registries/cargo.go index d90af778..10e4ba06 100644 --- a/internal/validators/registries/cargo.go +++ b/internal/validators/registries/cargo.go @@ -106,16 +106,37 @@ func cargoAllowedHosts(baseURL string) map[string]struct{} { return hosts } +// cargoURLAllowed reports whether u is a URL the validator may fetch for the +// given base. The host must be in allowedHosts; additionally, for the real +// crates.io base, the scheme must be https and the port must be the default — +// so a metadata response or redirect cannot downgrade to http or steer the +// validator at a non-standard port on an otherwise-trusted host. For test bases +// (httptest servers) only the host is checked, so mocks keep working. +func cargoURLAllowed(u *url.URL, baseURL string, allowedHosts map[string]struct{}) bool { + if _, ok := allowedHosts[u.Hostname()]; !ok { + return false + } + if baseURL == model.RegistryURLCrates { + if u.Scheme != "https" { + return false + } + if p := u.Port(); p != "" && p != "443" { + return false + } + } + return true +} + // newCargoHTTPClient builds the client used for all crates.io calls. The -// CheckRedirect policy pins every redirect hop to allowedHosts, so even though -// the initial URL is host-pinned, an upstream 3xx cannot redirect the validator -// to an unexpected host. -func newCargoHTTPClient(allowedHosts map[string]struct{}) *http.Client { +// CheckRedirect policy pins every redirect hop via cargoURLAllowed, so even +// though the initial URL is checked, an upstream 3xx cannot redirect the +// validator to an unexpected host, scheme, or port. +func newCargoHTTPClient(baseURL string, allowedHosts map[string]struct{}) *http.Client { return &http.Client{ Timeout: 10 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error { - if _, ok := allowedHosts[req.URL.Hostname()]; !ok { - return fmt.Errorf("refusing redirect to unexpected host %q", req.URL.Hostname()) + if !cargoURLAllowed(req.URL, baseURL, allowedHosts) { + return fmt.Errorf("refusing redirect to unexpected URL %q", req.URL.Redacted()) } if len(via) >= 10 { return errors.New("stopped after 10 redirects") @@ -125,37 +146,53 @@ func newCargoHTTPClient(allowedHosts map[string]struct{}) *http.Client { } } -// cargoVersionExists checks whether a specific crate version exists on crates.io, -// used to disambiguate a 403 from the README CDN. static.crates.io (S3) returns -// 403 both for a genuinely-missing crate/version AND for a crate that exists but -// has no rendered README, so a 403 alone cannot tell a publisher which it is. -// -// Returns (exists, determined): determined is false if the existence endpoint -// itself was unreachable or returned an unexpected status, in which case the -// caller should fall back to a generic message rather than assert existence. -func cargoVersionExists(ctx context.Context, client *http.Client, baseURL, identifier, version string) (exists, determined bool) { +// cargoVersionState is the outcome of probing the crate-version metadata +// endpoint, used to disambiguate a 403 from the README CDN. +type cargoVersionState int + +const ( + // cargoVersionUnknown: the probe returned an unexpected status we can't classify. + cargoVersionUnknown cargoVersionState = iota + // cargoVersionExists: the crate version exists (200). + cargoVersionExists + // cargoVersionMissing: the crate version does not exist (404). + cargoVersionMissing + // cargoVersionTransient: the probe failed for a retryable reason (network + // error, 429, or 5xx) — existence is undetermined and the caller should not + // report "not found". + cargoVersionTransient +) + +// probeCargoVersion checks whether a specific crate version exists on crates.io. +// static.crates.io (S3) returns 403 both for a genuinely-missing crate/version +// AND for a crate that exists but has no rendered README, so a 403 from the CDN +// alone cannot tell a publisher which it is; this probe disambiguates, while +// distinguishing a transient failure from a definitive missing/exists answer. +func probeCargoVersion(ctx context.Context, client *http.Client, baseURL, identifier, version string) cargoVersionState { versionURL := fmt.Sprintf("%s/api/v1/crates/%s/%s", baseURL, url.PathEscape(identifier), url.PathEscape(version)) req, err := http.NewRequestWithContext(ctx, http.MethodGet, versionURL, nil) if err != nil { - return false, false + return cargoVersionUnknown } req.Header.Set("User-Agent", cargoUserAgent) req.Header.Set("Accept", "application/json") resp, err := client.Do(req) if err != nil { - return false, false + return cargoVersionTransient } defer resp.Body.Close() - switch resp.StatusCode { - case http.StatusOK: - return true, true - case http.StatusNotFound: - return false, true + switch { + case resp.StatusCode == http.StatusOK: + return cargoVersionExists + case resp.StatusCode == http.StatusNotFound: + return cargoVersionMissing + case resp.StatusCode == http.StatusTooManyRequests, resp.StatusCode >= 500 && resp.StatusCode < 600: + return cargoVersionTransient default: - return false, false + return cargoVersionUnknown } } @@ -196,15 +233,21 @@ func cargoReadmeStatusError(ctx context.Context, client *http.Client, pkg model. // endpoint so the publisher gets an actionable message rather than a blanket // "not found". func cargoReadme403Error(ctx context.Context, client *http.Client, pkg model.Package, serverName string) error { - exists, determined := cargoVersionExists(ctx, client, pkg.RegistryBaseURL, pkg.Identifier, pkg.Version) - switch { - case determined && exists: - return fmt.Errorf("cargo package '%s' version '%s' exists on crates.io but has no rendered README. Add a README containing 'mcp-name: %s' and publish a new version", pkg.Identifier, pkg.Version, serverName) - case determined && !exists: + switch probeCargoVersion(ctx, client, pkg.RegistryBaseURL, pkg.Identifier, pkg.Version) { + case cargoVersionExists: + // The crate/version exists but the README CDN returned 403. The likely + // cause is a missing README, but a 403 is not definitive proof (e.g. a + // transient CDN/WAF block), so don't flatly assert "no README". + return fmt.Errorf("cargo package '%s' version '%s' exists on crates.io, but its rendered README could not be retrieved (status: 403). If it has no README, add one containing 'mcp-name: %s' and publish a new version", pkg.Identifier, pkg.Version, serverName) + case cargoVersionMissing: return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io", pkg.Identifier, pkg.Version) - default: - return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io (status: 403)", pkg.Identifier, pkg.Version) + case cargoVersionTransient: + return fmt.Errorf("crates.io could not confirm cargo package '%s' version '%s' (README status: 403, version check inconclusive) — likely transient, retry later", pkg.Identifier, pkg.Version) + case cargoVersionUnknown: + // Probe returned an unclassifiable status — fall through to the + // best-effort message below. } + return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io (status: 403)", pkg.Identifier, pkg.Version) } // validateCargoREADME performs the two-call README fetch and the mcp-name token @@ -213,7 +256,7 @@ func cargoReadme403Error(ctx context.Context, client *http.Client, pkg model.Pac // bypassing the exact-baseURL guard that ValidateCargo enforces for callers. func validateCargoREADME(ctx context.Context, pkg model.Package, serverName string) error { allowedHosts := cargoAllowedHosts(pkg.RegistryBaseURL) - client := newCargoHTTPClient(allowedHosts) + client := newCargoHTTPClient(pkg.RegistryBaseURL, allowedHosts) // Step 1: fetch the README pointer from the documented API endpoint. metaURL := fmt.Sprintf("%s/api/v1/crates/%s/%s/readme", @@ -246,14 +289,15 @@ func validateCargoREADME(ctx context.Context, pkg model.Package, serverName stri return fmt.Errorf("cargo package '%s' metadata response missing 'url' field", pkg.Identifier) } - // Pin the README pointer to an allowed host before fetching it, so a metadata - // response cannot steer the validator at an internal or attacker-chosen host. + // Pin the README pointer to an allowed host/scheme/port before fetching it, so + // a metadata response cannot steer the validator at an internal or + // attacker-chosen URL. readmeParsed, err := url.Parse(meta.URL) if err != nil || readmeParsed.Hostname() == "" { return fmt.Errorf("cargo package '%s': crates.io returned an unparseable README URL", pkg.Identifier) } - if _, ok := allowedHosts[readmeParsed.Hostname()]; !ok { - return fmt.Errorf("cargo package '%s': crates.io returned a README URL on unexpected host %q — refusing to fetch", pkg.Identifier, readmeParsed.Hostname()) + if !cargoURLAllowed(readmeParsed, pkg.RegistryBaseURL, allowedHosts) { + return fmt.Errorf("cargo package '%s': crates.io returned a README URL on an unexpected host/scheme %q — refusing to fetch", pkg.Identifier, readmeParsed.Redacted()) } // Step 2: fetch the rendered README from the (now host-validated) URL. diff --git a/internal/validators/registries/cargo_internal_test.go b/internal/validators/registries/cargo_internal_test.go new file mode 100644 index 00000000..44273b53 --- /dev/null +++ b/internal/validators/registries/cargo_internal_test.go @@ -0,0 +1,47 @@ +package registries + +import ( + "net/url" + "testing" + + "github.com/modelcontextprotocol/registry/pkg/model" +) + +// TestCargoURLAllowed covers the SSRF allow-check: for the real crates.io base, +// the host must be allow-listed AND the scheme/port must be https/default; for a +// test (httptest) base only the host is checked so mocks keep working. +func TestCargoURLAllowed(t *testing.T) { + prodHosts := cargoAllowedHosts(model.RegistryURLCrates) // {crates.io, static.crates.io} + mockBase := "http://127.0.0.1:54321" + mockHosts := cargoAllowedHosts(mockBase) // {127.0.0.1} + + cases := []struct { + desc string + raw string + baseURL string + hosts map[string]struct{} + want bool + }{ + {"prod: https static.crates.io", "https://static.crates.io/readmes/x/x.html", model.RegistryURLCrates, prodHosts, true}, + {"prod: https crates.io", "https://crates.io/api/v1/crates/x/1.0.0", model.RegistryURLCrates, prodHosts, true}, + {"prod: http downgrade rejected", "http://static.crates.io/x", model.RegistryURLCrates, prodHosts, false}, + {"prod: non-default port rejected", "https://static.crates.io:8443/x", model.RegistryURLCrates, prodHosts, false}, + {"prod: explicit 443 ok", "https://static.crates.io:443/x", model.RegistryURLCrates, prodHosts, true}, + {"prod: foreign host rejected", "https://evil.example/x", model.RegistryURLCrates, prodHosts, false}, + {"prod: userinfo host is evil rejected", "https://static.crates.io@evil.example/x", model.RegistryURLCrates, prodHosts, false}, + {"test base: mock host any scheme/port ok", "http://127.0.0.1:54321/readme-static/x", mockBase, mockHosts, true}, + {"test base: foreign host rejected", "http://127.0.0.2:54321/x", mockBase, mockHosts, false}, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + u, err := url.Parse(tc.raw) + if err != nil { + t.Fatalf("parse %q: %v", tc.raw, err) + } + if got := cargoURLAllowed(u, tc.baseURL, tc.hosts); got != tc.want { + t.Fatalf("cargoURLAllowed(%q, base=%q) = %v, want %v", tc.raw, tc.baseURL, got, tc.want) + } + }) + } +} diff --git a/internal/validators/registries/cargo_test.go b/internal/validators/registries/cargo_test.go index d2c8128d..0006448a 100644 --- a/internal/validators/registries/cargo_test.go +++ b/internal/validators/registries/cargo_test.go @@ -340,6 +340,7 @@ func TestValidateCargoCombinedFixture(t *testing.T) { readmeStatus int readmeBody string versionExists bool // response for the /api/v1/crates/{n}/{v} existence probe (403 disambiguation) + versionProbe int // if non-zero, the existence probe returns this status (overrides versionExists) wantErr bool wantContains []string wantNotContains []string @@ -377,8 +378,9 @@ func TestValidateCargoCombinedFixture(t *testing.T) { wantNotContains: []string{"has no rendered README"}, }, { - // Crate/version exists but has no rendered README: CDN 403 + existence - // probe 200. Must NOT be reported as "not found". + // Crate/version exists but the README CDN 403s: existence probe 200. + // Must NOT be reported as "not found", and must not flatly assert the + // README is absent (a 403 isn't definitive proof of that). name: "readme_403_no_readme", crateName: "combined-readme403-noreadme", version: "0.1.0", @@ -386,7 +388,20 @@ func TestValidateCargoCombinedFixture(t *testing.T) { readmeStatus: http.StatusForbidden, versionExists: true, wantErr: true, - wantContains: []string{"has no rendered README"}, + wantContains: []string{"exists on crates.io", "could not be retrieved"}, + wantNotContains: []string{"not found"}, + }, + { + // CDN 403 + the existence probe itself is rate-limited (429): existence + // is undetermined, so report transient/retryable, NOT "not found". + name: "readme_403_probe_transient", + crateName: "combined-readme403-probe429", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusForbidden, + versionProbe: http.StatusTooManyRequests, + wantErr: true, + wantContains: []string{"transient"}, wantNotContains: []string{"not found"}, }, { @@ -450,10 +465,13 @@ func TestValidateCargoCombinedFixture(t *testing.T) { } // Existence probe used to disambiguate a README 403. if r.URL.Path == versionPath { - if tt.versionExists { + switch { + case tt.versionProbe != 0: + http.Error(w, "simulated probe status", tt.versionProbe) + case tt.versionExists: w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(map[string]any{"version": map[string]string{"num": tt.version}}) - } else { + default: http.Error(w, "not found", http.StatusNotFound) } return From 165c1bf8ae665801614a77ab07164ca089163085 Mon Sep 17 00:00:00 2001 From: Radoslav Dimitrov Date: Fri, 5 Jun 2026 01:44:49 +0300 Subject: [PATCH 4/5] docs: clarify the mcp-name token must be followed by a boundary Note in the PyPI and NuGet ownership sections that the token must be followed by a newline, whitespace, an HTML tag, or the comment close `-->`, and must not be glued to trailing punctuation (e.g. a sentence-ending period). The matcher fix handles the comment-close case; this documents the remaining boundary rule. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/modelcontextprotocol-io/package-types.mdx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/modelcontextprotocol-io/package-types.mdx b/docs/modelcontextprotocol-io/package-types.mdx index 6cac890f..0367facc 100644 --- a/docs/modelcontextprotocol-io/package-types.mdx +++ b/docs/modelcontextprotocol-io/package-types.mdx @@ -87,6 +87,8 @@ This MCP server executes SQL queries and manages database connections. ``` +The `mcp-name:` token must be followed by a boundary — a newline, whitespace, an HTML tag, or the comment close `-->`. Keep it on its own line or inside ``; do not glue it directly to trailing characters such as a sentence-ending period (`…/database-query-mcp.`), which prevents the match. + ## NuGet Packages For NuGet packages, the MCP Registry currently supports the official NuGet registry (`https://api.nuget.org/v3/index.json`) only. @@ -125,6 +127,8 @@ This MCP server manages Azure DevOps work items and pipelines. ``` +The `mcp-name:` token must be followed by a boundary — a newline, whitespace, an HTML tag, or the comment close `-->`. Keep it on its own line or inside ``; do not glue it directly to trailing characters such as a sentence-ending period (`…/azure-devops-mcp.`), which prevents the match. + ## Cargo (Rust) Packages For Cargo packages, the MCP Registry currently supports the official crates.io registry (`https://crates.io`) only. From 45987b764b68d92addc365b13adb3dfa0ac80396 Mon Sep 17 00:00:00 2001 From: Radoslav Dimitrov Date: Fri, 5 Jun 2026 09:21:17 +0300 Subject: [PATCH 5/5] fix(validators): explain when an mcp-name token is present but glued MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a publisher's README contains `mcp-name: NAME` but the boundary-anchored match rejects it (the token is glued to a trailing character such as a sentence-ending period or `/`), the previous error said the name "must appear as 'mcp-name: NAME' in the package README" — which the publisher sees that it already does, giving no clue what's wrong. This is the failure mode of the registry-wide anchoring change, so the message needs to name the cause. Add mcpNameTokenGluedTrailing, which reports the offending trailing character when the literal token is present but unterminated, and use it in the PyPI, Cargo, and NuGet validators to emit an actionable message: "found 'mcp-name: NAME' but it is immediately followed by 'X' — put it on its own line and republish". NuGet gains a GluedReadme state to distinguish this from a genuinely-absent token. Tests: TestMCPNameTokenGluedTrailing for the helper, and a cargo combined-fixture case asserting the explanatory message on a glued trailing period. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/validators/registries/cargo.go | 6 ++++ internal/validators/registries/cargo_test.go | 12 ++++++++ internal/validators/registries/mcpname.go | 23 +++++++++++++++ .../registries/mcpname_internal_test.go | 28 +++++++++++++++++++ internal/validators/registries/nuget.go | 8 ++++++ internal/validators/registries/pypi.go | 6 ++++ 6 files changed, 83 insertions(+) diff --git a/internal/validators/registries/cargo.go b/internal/validators/registries/cargo.go index 10e4ba06..5aa72b10 100644 --- a/internal/validators/registries/cargo.go +++ b/internal/validators/registries/cargo.go @@ -331,5 +331,11 @@ func validateCargoREADME(ctx context.Context, pkg model.Package, serverName stri return nil } + // If the token IS present but glued to a trailing character, explain that + // rather than telling the publisher to add a token they can already see. + if trailing, glued := mcpNameTokenGluedTrailing(string(body), serverName); glued { + return fmt.Errorf("cargo package '%s' ownership validation failed: found 'mcp-name: %s' in the README, but it is immediately followed by %q rather than a boundary. The token must be followed by a space, newline, or an HTML tag — put it on its own line and publish a new version", pkg.Identifier, serverName, trailing) + } + return fmt.Errorf("cargo package '%s' ownership validation failed. The server name '%s' must appear as 'mcp-name: %s' in the package README", pkg.Identifier, serverName, serverName) } diff --git a/internal/validators/registries/cargo_test.go b/internal/validators/registries/cargo_test.go index 0006448a..5328f452 100644 --- a/internal/validators/registries/cargo_test.go +++ b/internal/validators/registries/cargo_test.go @@ -436,6 +436,18 @@ func TestValidateCargoCombinedFixture(t *testing.T) { wantErr: true, wantContains: []string{"ownership validation failed"}, }, + { + // Token present but glued to a trailing period — the error must explain + // the boundary cause, not tell the publisher to add a token they can see. + name: "glued_trailing_period_explained", + crateName: "combined-glued", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusOK, + readmeBody: fmt.Sprintf("

mcp-name: %s.

", serverName), + wantErr: true, + wantContains: []string{"immediately followed by", `"."`}, + }, } // lastMetaPath captures the metadata request path seen by the handler so diff --git a/internal/validators/registries/mcpname.go b/internal/validators/registries/mcpname.go index 4f04b0a2..527aefb1 100644 --- a/internal/validators/registries/mcpname.go +++ b/internal/validators/registries/mcpname.go @@ -72,3 +72,26 @@ func containsMCPNameToken(content, serverName string) bool { searchFrom = searchFrom + idx + 1 } } + +// mcpNameTokenGluedTrailing explains why a visibly-present token failed to match. +// When containsMCPNameToken has already returned false, it reports whether the +// literal "mcp-name: " string is nonetheless present and, if so, the +// character immediately following it — i.e. the trailing character that made the +// occurrence look like a prefix of a longer name rather than a complete token. +// Validators use it to turn an unhelpful "token must appear" message into an +// actionable "found it, but it's glued to %q — put it on its own line" message. +// Returns ("", false) when the literal token is absent (a genuinely missing token). +func mcpNameTokenGluedTrailing(content, serverName string) (trailing string, glued bool) { + token := "mcp-name: " + serverName + idx := strings.Index(content, token) + if idx < 0 { + return "", false + } + end := idx + len(token) + if end >= len(content) { + // A token at end-of-content is a valid boundary, so containsMCPNameToken + // would not have failed; be defensive and treat it as not-glued. + return "", false + } + return string(content[end]), true +} diff --git a/internal/validators/registries/mcpname_internal_test.go b/internal/validators/registries/mcpname_internal_test.go index bbbf3482..cd0bc385 100644 --- a/internal/validators/registries/mcpname_internal_test.go +++ b/internal/validators/registries/mcpname_internal_test.go @@ -48,6 +48,34 @@ func TestContainsMCPNameToken(t *testing.T) { } } +// TestMCPNameTokenGluedTrailing covers the diagnostic helper that explains why a +// visibly-present token failed: it reports the trailing character gluing the +// token to a longer name, or ("", false) when the token is genuinely absent. +func TestMCPNameTokenGluedTrailing(t *testing.T) { + const name = "io.github.acme/widget" + cases := []struct { + desc string + content string + wantTrailing string + wantGlued bool + }{ + {"absent", "no token here", "", false}, + {"glued period", "mcp-name: io.github.acme/widget.", ".", true}, + {"glued hyphen (longer name)", "mcp-name: io.github.acme/widget-pro", "-", true}, + {"glued slash", "mcp-name: io.github.acme/widget/x", "/", true}, + {"at end of content (boundary, not glued)", "mcp-name: io.github.acme/widget", "", false}, + } + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + gotTrailing, gotGlued := mcpNameTokenGluedTrailing(tc.content, name) + if gotGlued != tc.wantGlued || gotTrailing != tc.wantTrailing { + t.Fatalf("mcpNameTokenGluedTrailing(%q) = (%q, %v), want (%q, %v)", + tc.content, gotTrailing, gotGlued, tc.wantTrailing, tc.wantGlued) + } + }) + } +} + // FuzzContainsMCPNameToken pins the core safety property of the boundary-anchored // matcher: it is strictly stricter than a bare substring check. A true result // must imply the literal token is present (strings.Contains). This guards against diff --git a/internal/validators/registries/nuget.go b/internal/validators/registries/nuget.go index f8f97d7a..c229ba02 100644 --- a/internal/validators/registries/nuget.go +++ b/internal/validators/registries/nuget.go @@ -51,6 +51,9 @@ type ReadmeState int const ( ValidReadme ReadmeState = iota InvalidReadme + // GluedReadme: the literal token is present but glued to a trailing character + // (so the boundary-anchored match rejected it as a prefix of a longer name). + GluedReadme NoReadme ) @@ -99,6 +102,8 @@ func ValidateNuGet(ctx context.Context, pkg model.Package, serverName string) er return nil case InvalidReadme: return fmt.Errorf("NuGet package '%s' ownership validation for version %s failed. The server name '%s' must appear as 'mcp-name: %s' in the package README. Add it to your README and publish a new package version", pkg.Identifier, pkg.Version, serverName, serverName) + case GluedReadme: + return fmt.Errorf("NuGet package '%s' ownership validation for version %s failed: found 'mcp-name: %s' in the README, but it is immediately followed by another character rather than a boundary. The token must be followed by a space, newline, an HTML tag, or a comment close ('-->') — put it on its own line and publish a new package version", pkg.Identifier, pkg.Version, serverName) case NoReadme: // Continue to check if package exists default: @@ -242,6 +247,9 @@ func validateReadme(ctx context.Context, serverName, lowerID, lowerVersion strin if containsMCPNameToken(readmeContent, serverName) { return ValidReadme, nil } + if _, glued := mcpNameTokenGluedTrailing(readmeContent, serverName); glued { + return GluedReadme, nil + } return InvalidReadme, nil } diff --git a/internal/validators/registries/pypi.go b/internal/validators/registries/pypi.go index 04068a3e..ec160bd9 100644 --- a/internal/validators/registries/pypi.go +++ b/internal/validators/registries/pypi.go @@ -90,5 +90,11 @@ func ValidatePyPI(ctx context.Context, pkg model.Package, serverName string) err return nil } + // If the token IS present but glued to a trailing character, say so — otherwise + // the publisher sees "must appear as mcp-name: X" while looking at exactly that. + if trailing, glued := mcpNameTokenGluedTrailing(description, serverName); glued { + return fmt.Errorf("PyPI package '%s' ownership validation failed: found 'mcp-name: %s' in the README, but it is immediately followed by %q rather than a boundary. The token must be followed by a space, newline, an HTML tag, or a comment close ('-->') — put it on its own line and republish", pkg.Identifier, serverName, trailing) + } + return fmt.Errorf("PyPI package '%s' ownership validation failed. The server name '%s' must appear as 'mcp-name: %s' in the package README", pkg.Identifier, serverName, serverName) }