Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 232 additions & 8 deletions packages/vinext/src/config/config-matchers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,19 +259,242 @@ function stripHopByHopRequestHeaders(headers: Headers): void {
}
}

/**
* Read an unbounded quantifier at `idx`. Returns true for `*`, `+`, and the
* open-ended brace form `{n,}`. Bounded forms (`{n}`, `{n,m}`) and the optional
* `?` are NOT unbounded and so cannot drive exponential backtracking.
*/
function isUnboundedQuantifierAt(pattern: string, idx: number): boolean {
const c = pattern[idx];
if (c === "*" || c === "+") return true;
if (c !== "{") return false;
let j = idx + 1;
while (j < pattern.length && pattern[j] >= "0" && pattern[j] <= "9") j++;
// `{n,}` (comma immediately followed by `}`) is unbounded; `{n}`/`{n,m}` are not.
return pattern[j] === "," && pattern[j + 1] === "}";
}

/**
* Strip a group-body prefix so the remaining text is a plain alternation.
* Non-capturing groups (`?:`) are unwrapped. Lookarounds, named groups, and
* inline-flag groups (`?=`, `?!`, `?<=`, `?<name>`, `?i:`, …) are skipped
* (returns null) — analysing them textually would be unreliable, so we simply
* don't flag them here (the nested-quantifier pass still applies).
*/
function alternationBodyOf(groupBody: string): string | null {
if (!groupBody.startsWith("?")) return groupBody;
if (groupBody.startsWith("?:")) return groupBody.slice(2);
return null;
}

/** Split a group body into its top-level (depth-0) `|` alternatives. */
function splitTopLevelAlternatives(body: string): string[] {
const branches: string[] = [];
let current = "";
let depth = 0;
let inClass = false;
for (let i = 0; i < body.length; i++) {
const ch = body[i];
if (ch === "\\") {
current += ch + (body[i + 1] ?? "");
i++;
continue;
}
if (inClass) {
current += ch;
if (ch === "]") inClass = false;
continue;
}
if (ch === "[") {
inClass = true;
current += ch;
continue;
}
if (ch === "(") depth++;
else if (ch === ")") depth--;
else if (ch === "|" && depth === 0) {
branches.push(current);
current = "";
continue;
}
current += ch;
}
branches.push(current);
return branches;
}

/**
* Conservatively detect overlapping alternatives. Two branches overlap when one
* is equal to, or a textual prefix of, the other (e.g. `a|a`, `a|ab`,
* `\d|\d\d`). Distinct-token alternations like `foo|bar` or `GET|POST` do not
* overlap and are left alone.
*/
function hasOverlappingAlternatives(branches: readonly string[]): boolean {
const trimmed = branches.map((b) => b.trim()).filter((b) => b.length > 0);
for (let a = 0; a < trimmed.length; a++) {
for (let b = a + 1; b < trimmed.length; b++) {
const longer = trimmed[a].length >= trimmed[b].length ? trimmed[a] : trimmed[b];
const shorter = trimmed[a].length >= trimmed[b].length ? trimmed[b] : trimmed[a];
if (longer.startsWith(shorter)) return true;
}
}
return false;
}

/**
* If `branch` is, in its entirety, a single parenthesised group (its first
* non-space char is `(` whose matching `)` is the last non-space char), return
* that group's inner body. Otherwise return null.
*
* Used to peel a redundant wrapper group off a quantified body so a wrapped
* alternation is still analysed. For example the body of `((a|a))*` is the
* single branch `(a|a)`; unwrapping it yields `a|a`, which is overlapping.
* `?:`/named/lookaround prefixes are normalised via `alternationBodyOf` by the
* caller after unwrapping; here we only strip the outer parentheses.
*/
function unwrapSoleGroup(branch: string): string | null {
const trimmed = branch.trim();
if (trimmed.length < 2 || trimmed[0] !== "(" || trimmed[trimmed.length - 1] !== ")") {
return null;
}
// Verify the opening `(` matches the trailing `)` (i.e. it's a single group
// spanning the whole branch, not e.g. `(a)(b)` where the first `)` closes
// early). Track depth, character classes, and escapes the same way the
// tokenisers above do.
let depth = 0;
let inClass = false;
for (let i = 0; i < trimmed.length; i++) {
const ch = trimmed[i];
if (ch === "\\") {
i++;
continue;
}
if (inClass) {
if (ch === "]") inClass = false;
continue;
}
if (ch === "[") {
inClass = true;
continue;
}
if (ch === "(") depth++;
else if (ch === ")") {
depth--;
// If the group closes before the final char, this is not a sole group.
if (depth === 0 && i !== trimmed.length - 1) return null;
}
}
if (depth !== 0) return null;
return trimmed.slice(1, -1);
}

/**
* Decide whether a quantified group's body contains an overlapping alternation,
* accounting for redundant wrapper groups. The detector that calls this only
* inspects the *directly* quantified group, so without unwrapping, trivial
* transforms of `(a|a)*` slip through while staying exponential:
*
* - `((a|a))*` — body is the sole group `(a|a)`
* - `((a|a)|x)*` — branch `(a|a)` is a nested overlapping group
* - `(x|(a|a))*` — branch `(a|a)` is a nested overlapping group
*
* Strategy: split the body into top-level alternatives. If two or more of them
* overlap, it's ambiguous. Otherwise, recurse into any branch that is wholly a
* single group (peeling the wrapper, then normalising any `?:` prefix) — a
* nested ambiguous alternation under the same unbounded quantifier is just as
* exponential as a direct one.
*
* Precision is preserved: disjoint/bounded alternations (`(ab|cd)*`,
* `((ab|cd))*`) recurse to non-overlapping branches and stay safe. A bounded
* recursion depth caps work on adversarial nesting.
*/
function bodyHasOverlappingAlternation(body: string, depth = 0): boolean {
// Cap recursion so deeply/adversarially nested wrappers can't blow up the
// scan. Real config regexes nest a handful of levels at most.
if (depth > 32) return false;

const branches = splitTopLevelAlternatives(body);
if (branches.length >= 2 && hasOverlappingAlternatives(branches)) return true;

// Recurse into branches that are wholly a single group. This catches the
// wrapped (`((a|a))*`) and nested (`(x|(a|a))*`) variants. Note a single
// top-level branch that is a sole group (the `((a|a))*` case) is handled here
// even though the `>= 2` overlap check above didn't fire.
for (const branch of branches) {
const inner = unwrapSoleGroup(branch);
if (inner === null) continue;
const normalised = alternationBodyOf(inner);
if (normalised === null) continue;
if (bodyHasOverlappingAlternation(normalised, depth + 1)) return true;
}
return false;
}

/**
* Detect an alternation with overlapping branches that is directly repeated by
* an UNBOUNDED quantifier, e.g. `(a|a)*`, `(a|ab)+`, `(\d|\d\d){2,}`.
*
* This is the complement of the nested-quantifier check below: such a group
* contains no inner quantifier, yet still backtracks exponentially because the
* ambiguous alternatives give the engine multiple ways to consume the same
* input on every repetition (`(a|a)*` against `"aaaa…!"` is exponential).
*/
function hasAmbiguousUnboundedAlternation(pattern: string): boolean {
const groupStartStack: number[] = [];
let inClass = false;
for (let i = 0; i < pattern.length; i++) {
const ch = pattern[i];
if (ch === "\\") {
i++;
continue;
}
if (inClass) {
if (ch === "]") inClass = false;
continue;
}
if (ch === "[") {
inClass = true;
continue;
}
if (ch === "(") {
groupStartStack.push(i + 1);
continue;
}
if (ch === ")") {
const start = groupStartStack.pop();
if (start === undefined) continue;
if (!isUnboundedQuantifierAt(pattern, i + 1)) continue;
const body = alternationBodyOf(pattern.slice(start, i));
if (body === null) continue;
// Analyse the body recursively so wrapped/nested alternations are caught
// too — `((a|a))*`, `((a|a)|x)*`, `(x|(a|a))*` are all exponential.
if (bodyHasOverlappingAlternation(body)) return true;
}
}
return false;
}

/**
* Detect regex patterns vulnerable to catastrophic backtracking (ReDoS).
*
* Uses a lightweight heuristic: scans the pattern string for nested quantifiers
* (a quantifier applied to a group that itself contains a quantifier). This
* catches the most common pathological patterns like `(a+)+`, `(.*)*`,
* `([^/]+)+`, `(a|a+)+` without needing a full regex parser.
* Uses a lightweight heuristic without a full regex parser. Two pathological
* shapes are rejected:
*
* 1. Nested quantifiers — a quantifier applied to a group that itself
* contains a quantifier, e.g. `(a+)+`, `(.*)*`, `([^/]+)+`, `(a|a+)+`.
* 2. An overlapping alternation under an UNBOUNDED quantifier, e.g. `(a|a)*`,
* `(a|ab)+`, `(\d|\d\d){2,}`. These contain no inner quantifier but still
* backtrack exponentially because the ambiguous branches give the engine
* multiple ways to consume the same input on each repetition.
*
* Returns true if the pattern appears safe, false if it's potentially dangerous.
*/
export function isSafeRegex(pattern: string): boolean {
// Track parenthesis nesting depth and whether we've seen a quantifier
// at each depth level.
// (2) Ambiguous alternation repeated by an unbounded quantifier.
if (hasAmbiguousUnboundedAlternation(pattern)) return false;

// (1) Nested quantifiers — track parenthesis nesting depth and whether we've
// seen a quantifier at each depth level.
const quantifierAtDepth: boolean[] = [];
let depth = 0;
let i = 0;
Expand Down Expand Up @@ -383,8 +606,9 @@ export function safeRegExp(pattern: string, flags?: string): RegExp | null {
if (!isSafeRegex(pattern)) {
console.warn(
`[vinext] Ignoring potentially unsafe regex pattern (ReDoS risk): ${pattern}\n` +
` Patterns with nested quantifiers (e.g. (a+)+) can cause catastrophic backtracking.\n` +
` Simplify the pattern to avoid nested repetition.`,
` Nested quantifiers (e.g. (a+)+) and overlapping alternations repeated by an\n` +
` unbounded quantifier (e.g. (a|a)*, (a|ab)+) can cause catastrophic backtracking.\n` +
` Simplify the pattern to avoid nested repetition and ambiguous alternatives.`,
);
return null;
}
Expand Down
90 changes: 90 additions & 0 deletions tests/shims.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9057,6 +9057,96 @@ describe("isSafeRegex", () => {
expect(isSafeRegex("(foo|bar)*")).toBe(true);
});

// Overlapping alternation under an unbounded quantifier — no inner quantifier,
// but still exponential because the ambiguous branches give the engine
// multiple ways to consume the same input on each repetition. `(a|a)*` matched
// against "aaaa…!" takes seconds at ~30 chars. These complement the
// nested-quantifier checks above (which `(a|a)*` slips past).
it("rejects overlapping alternation repeated by *: (a|a)*", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(a|a)*")).toBe(false);
expect(isSafeRegex("(a|a)*$")).toBe(false);
expect(isSafeRegex("^(a|a)*!")).toBe(false);
});

it("rejects overlapping alternation repeated by +: (a|a|a)+, (foo|foo)+", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(a|a|a)+")).toBe(false);
expect(isSafeRegex("(foo|foo)+")).toBe(false);
});

it("rejects prefix-overlapping alternation: (a|ab)*, (\\d|\\d\\d)+", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(a|ab)*")).toBe(false);
expect(isSafeRegex("(\\d|\\d\\d)+")).toBe(false);
expect(isSafeRegex("(ab|abc)+")).toBe(false);
});

it("rejects overlapping alternation repeated by unbounded brace: (a|a){2,}", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(a|a){2,}")).toBe(false);
});

it("rejects overlapping alternation in a non-capturing group: (?:a|a)*", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(?:a|a)*")).toBe(false);
});

it("accepts disjoint alternation under a quantifier (no overlap)", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
// Distinct-token alternations are unambiguous and safe.
expect(isSafeRegex("(foo|bar)*")).toBe(true);
expect(isSafeRegex("(foo|bar|baz)+")).toBe(true);
expect(isSafeRegex("(en|fr|de)*")).toBe(true);
expect(isSafeRegex("(a|b)*")).toBe(true);
expect(isSafeRegex("(GET|POST|PUT)+")).toBe(true);
});

it("accepts overlapping alternation only under BOUNDED repetition", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
// Bounded repetition cannot blow up exponentially.
expect(isSafeRegex("(a|a){2,5}")).toBe(true);
expect(isSafeRegex("(a|a){3}")).toBe(true);
// Optional (zero-or-one) is only 2 paths.
expect(isSafeRegex("(a|a)?")).toBe(true);
// No quantifier at all.
expect(isSafeRegex("(a|a)")).toBe(true);
});

// Wrapping an overlapping alternation in a redundant group must NOT let it
// slip past the unbounded-quantifier check. These are trivial transforms of
// `(a|a)*` and remain exponential (~7s on ~26 chars). Without unwrapping the
// sole/nested child group, the top-level split sees a single non-overlapping
// branch and rates them safe.
it("rejects a wrapped overlapping alternation: ((a|a))*", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("((a|a))*")).toBe(false);
expect(isSafeRegex("((a|a))+")).toBe(false);
expect(isSafeRegex("((a|a)){2,}")).toBe(false);
// Multiple redundant wrapper layers and a non-capturing wrapper.
expect(isSafeRegex("(((a|a)))*")).toBe(false);
expect(isSafeRegex("(?:(?:a|a))*")).toBe(false);
});

it("rejects a nested overlapping alternation branch: ((a|a)|x)* and (x|(a|a))*", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("((a|a)|x)*")).toBe(false);
expect(isSafeRegex("(x|(a|a))*")).toBe(false);
// Prefix-overlap nested in a branch, repeated by +.
expect(isSafeRegex("(x|(a|ab)|y)+")).toBe(false);
});

it("keeps disjoint wrapped/nested alternations SAFE (no false positives)", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
// Distinct-token branches are unambiguous even when wrapped or nested.
expect(isSafeRegex("(ab|cd)*")).toBe(true);
expect(isSafeRegex("((ab|cd))*")).toBe(true);
expect(isSafeRegex("((ab|cd)|ef)*")).toBe(true);
expect(isSafeRegex("(x|(ab|cd))*")).toBe(true);
// A wrapped locale alternation (common in real config) stays safe.
expect(isSafeRegex("((en|fr|de))*")).toBe(true);
});

it("treats escaped characters as safe", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
// \\+ is a literal +, not a quantifier
Expand Down
Loading