Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 142 additions & 8 deletions packages/vinext/src/config/config-matchers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,19 +259,152 @@ function stripHopByHopRequestHeaders(headers: Headers): void {
}
}

/**
* Read an unbounded quantifier at `idx`. Returns true for `*`, `+`, and the
* open-ended brace form `{n,}`. Bounded forms (`{n}`, `{n,m}`) and the optional
* `?` are NOT unbounded and so cannot drive exponential backtracking.
*/
function isUnboundedQuantifierAt(pattern: string, idx: number): boolean {
const c = pattern[idx];
if (c === "*" || c === "+") return true;
if (c !== "{") return false;
let j = idx + 1;
while (j < pattern.length && pattern[j] >= "0" && pattern[j] <= "9") j++;
// `{n,}` (comma immediately followed by `}`) is unbounded; `{n}`/`{n,m}` are not.
return pattern[j] === "," && pattern[j + 1] === "}";
}

/**
* Strip a group-body prefix so the remaining text is a plain alternation.
* Non-capturing groups (`?:`) are unwrapped. Lookarounds, named groups, and
* inline-flag groups (`?=`, `?!`, `?<=`, `?<name>`, `?i:`, …) are skipped
* (returns null) — analysing them textually would be unreliable, so we simply
* don't flag them here (the nested-quantifier pass still applies).
*/
function alternationBodyOf(groupBody: string): string | null {
if (!groupBody.startsWith("?")) return groupBody;
if (groupBody.startsWith("?:")) return groupBody.slice(2);
return null;
}

/** Split a group body into its top-level (depth-0) `|` alternatives. */
function splitTopLevelAlternatives(body: string): string[] {
const branches: string[] = [];
let current = "";
let depth = 0;
let inClass = false;
for (let i = 0; i < body.length; i++) {
const ch = body[i];
if (ch === "\\") {
current += ch + (body[i + 1] ?? "");
i++;
continue;
}
if (inClass) {
current += ch;
if (ch === "]") inClass = false;
continue;
}
if (ch === "[") {
inClass = true;
current += ch;
continue;
}
if (ch === "(") depth++;
else if (ch === ")") depth--;
else if (ch === "|" && depth === 0) {
branches.push(current);
current = "";
continue;
}
current += ch;
}
branches.push(current);
return branches;
}

/**
* Conservatively detect overlapping alternatives. Two branches overlap when one
* is equal to, or a textual prefix of, the other (e.g. `a|a`, `a|ab`,
* `\d|\d\d`). Distinct-token alternations like `foo|bar` or `GET|POST` do not
* overlap and are left alone.
*/
function hasOverlappingAlternatives(branches: readonly string[]): boolean {
const trimmed = branches.map((b) => b.trim()).filter((b) => b.length > 0);
for (let a = 0; a < trimmed.length; a++) {
for (let b = a + 1; b < trimmed.length; b++) {
const longer = trimmed[a].length >= trimmed[b].length ? trimmed[a] : trimmed[b];
const shorter = trimmed[a].length >= trimmed[b].length ? trimmed[b] : trimmed[a];
if (longer.startsWith(shorter)) return true;
}
}
return false;
}

/**
* Detect an alternation with overlapping branches that is directly repeated by
* an UNBOUNDED quantifier, e.g. `(a|a)*`, `(a|ab)+`, `(\d|\d\d){2,}`.
*
* This is the complement of the nested-quantifier check below: such a group
* contains no inner quantifier, yet still backtracks exponentially because the
* ambiguous alternatives give the engine multiple ways to consume the same
* input on every repetition (`(a|a)*` against `"aaaa…!"` is exponential).
*/
function hasAmbiguousUnboundedAlternation(pattern: string): boolean {
const groupStartStack: number[] = [];
let inClass = false;
for (let i = 0; i < pattern.length; i++) {
const ch = pattern[i];
if (ch === "\\") {
i++;
continue;
}
if (inClass) {
if (ch === "]") inClass = false;
continue;
}
if (ch === "[") {
inClass = true;
continue;
}
if (ch === "(") {
groupStartStack.push(i + 1);
continue;
}
if (ch === ")") {
const start = groupStartStack.pop();
if (start === undefined) continue;
if (!isUnboundedQuantifierAt(pattern, i + 1)) continue;
const body = alternationBodyOf(pattern.slice(start, i));
if (body === null) continue;
const branches = splitTopLevelAlternatives(body);
if (branches.length >= 2 && hasOverlappingAlternatives(branches)) return true;
}
}
return false;
}

/**
* Detect regex patterns vulnerable to catastrophic backtracking (ReDoS).
*
* Uses a lightweight heuristic: scans the pattern string for nested quantifiers
* (a quantifier applied to a group that itself contains a quantifier). This
* catches the most common pathological patterns like `(a+)+`, `(.*)*`,
* `([^/]+)+`, `(a|a+)+` without needing a full regex parser.
* Uses a lightweight heuristic without a full regex parser. Two pathological
* shapes are rejected:
*
* 1. Nested quantifiers — a quantifier applied to a group that itself
* contains a quantifier, e.g. `(a+)+`, `(.*)*`, `([^/]+)+`, `(a|a+)+`.
* 2. An overlapping alternation under an UNBOUNDED quantifier, e.g. `(a|a)*`,
* `(a|ab)+`, `(\d|\d\d){2,}`. These contain no inner quantifier but still
* backtrack exponentially because the ambiguous branches give the engine
* multiple ways to consume the same input on each repetition.
*
* Returns true if the pattern appears safe, false if it's potentially dangerous.
*/
export function isSafeRegex(pattern: string): boolean {
// Track parenthesis nesting depth and whether we've seen a quantifier
// at each depth level.
// (2) Ambiguous alternation repeated by an unbounded quantifier.
if (hasAmbiguousUnboundedAlternation(pattern)) return false;

// (1) Nested quantifiers — track parenthesis nesting depth and whether we've
// seen a quantifier at each depth level.
const quantifierAtDepth: boolean[] = [];
let depth = 0;
let i = 0;
Expand Down Expand Up @@ -383,8 +516,9 @@ export function safeRegExp(pattern: string, flags?: string): RegExp | null {
if (!isSafeRegex(pattern)) {
console.warn(
`[vinext] Ignoring potentially unsafe regex pattern (ReDoS risk): ${pattern}\n` +
` Patterns with nested quantifiers (e.g. (a+)+) can cause catastrophic backtracking.\n` +
` Simplify the pattern to avoid nested repetition.`,
` Nested quantifiers (e.g. (a+)+) and overlapping alternations repeated by an\n` +
` unbounded quantifier (e.g. (a|a)*, (a|ab)+) can cause catastrophic backtracking.\n` +
` Simplify the pattern to avoid nested repetition and ambiguous alternatives.`,
);
return null;
}
Expand Down
56 changes: 56 additions & 0 deletions tests/shims.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9057,6 +9057,62 @@ describe("isSafeRegex", () => {
expect(isSafeRegex("(foo|bar)*")).toBe(true);
});

// Overlapping alternation under an unbounded quantifier — no inner quantifier,
// but still exponential because the ambiguous branches give the engine
// multiple ways to consume the same input on each repetition. `(a|a)*` matched
// against "aaaa…!" takes seconds at ~30 chars. These complement the
// nested-quantifier checks above (which `(a|a)*` slips past).
it("rejects overlapping alternation repeated by *: (a|a)*", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(a|a)*")).toBe(false);
expect(isSafeRegex("(a|a)*$")).toBe(false);
expect(isSafeRegex("^(a|a)*!")).toBe(false);
});

it("rejects overlapping alternation repeated by +: (a|a|a)+, (foo|foo)+", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(a|a|a)+")).toBe(false);
expect(isSafeRegex("(foo|foo)+")).toBe(false);
});

it("rejects prefix-overlapping alternation: (a|ab)*, (\\d|\\d\\d)+", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(a|ab)*")).toBe(false);
expect(isSafeRegex("(\\d|\\d\\d)+")).toBe(false);
expect(isSafeRegex("(ab|abc)+")).toBe(false);
});

it("rejects overlapping alternation repeated by unbounded brace: (a|a){2,}", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(a|a){2,}")).toBe(false);
});

it("rejects overlapping alternation in a non-capturing group: (?:a|a)*", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
expect(isSafeRegex("(?:a|a)*")).toBe(false);
});

it("accepts disjoint alternation under a quantifier (no overlap)", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
// Distinct-token alternations are unambiguous and safe.
expect(isSafeRegex("(foo|bar)*")).toBe(true);
expect(isSafeRegex("(foo|bar|baz)+")).toBe(true);
expect(isSafeRegex("(en|fr|de)*")).toBe(true);
expect(isSafeRegex("(a|b)*")).toBe(true);
expect(isSafeRegex("(GET|POST|PUT)+")).toBe(true);
});

it("accepts overlapping alternation only under BOUNDED repetition", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
// Bounded repetition cannot blow up exponentially.
expect(isSafeRegex("(a|a){2,5}")).toBe(true);
expect(isSafeRegex("(a|a){3}")).toBe(true);
// Optional (zero-or-one) is only 2 paths.
expect(isSafeRegex("(a|a)?")).toBe(true);
// No quantifier at all.
expect(isSafeRegex("(a|a)")).toBe(true);
});

it("treats escaped characters as safe", async () => {
const { isSafeRegex } = await import("../packages/vinext/src/config/config-matchers.js");
// \\+ is a literal +, not a quantifier
Expand Down
Loading