diff --git a/.changeset/fence-aware-delta-parsing.md b/.changeset/fence-aware-delta-parsing.md new file mode 100644 index 000000000..a9237952f --- /dev/null +++ b/.changeset/fence-aware-delta-parsing.md @@ -0,0 +1,7 @@ +--- +"@fission-ai/openspec": patch +--- + +### Fixed + +- Ignore Markdown structure (requirement headers, delta sections, scenarios, REMOVED/RENAMED entries) that appears inside fenced code blocks when parsing delta specs. Previously a fenced `### Requirement:` example was parsed as a real (phantom) requirement, producing spurious `validate` errors and risking incorrect `archive` output. Fenced-code detection is now shared across the Markdown parsers so `validate` and `archive` behave consistently. diff --git a/src/core/parsers/code-fence.ts b/src/core/parsers/code-fence.ts new file mode 100644 index 000000000..bb580c2d8 --- /dev/null +++ b/src/core/parsers/code-fence.ts @@ -0,0 +1,62 @@ +/** + * Shared fenced-code-block detection for the Markdown parsers. + * + * Several parsers need to ignore Markdown structure (headers, requirement + * blocks, scenarios, delta sections) that appears inside fenced code blocks. + * Keeping this logic in one place avoids the drift that previously left + * `requirement-blocks.ts` treating fenced `### Requirement:` lines as real + * requirements during validation and archiving. + */ + +interface ActiveFence { + marker: '`' | '~'; + length: number; +} + +function getFenceMarker(line: string): ActiveFence | null { + const fenceMatch = line.match(/^\s*(`{3,}|~{3,})/); + if (!fenceMatch) { + return null; + } + + return { + marker: fenceMatch[1][0] as '`' | '~', + length: fenceMatch[1].length, + }; +} + +function isClosingFence(line: string, activeFence: ActiveFence): boolean { + const fenceMatch = line.match(/^\s*(`{3,}|~{3,})\s*$/); + return Boolean( + fenceMatch && + fenceMatch[1][0] === activeFence.marker && + fenceMatch[1].length >= activeFence.length + ); +} + +/** + * Builds a per-line mask where `true` marks a line that is part of a fenced + * code block (including the opening and closing fence lines themselves). + */ +export function buildCodeFenceMask(lines: string[]): boolean[] { + const mask = new Array(lines.length).fill(false); + let activeFence: ActiveFence | null = null; + + for (let i = 0; i < lines.length; i++) { + if (!activeFence) { + const fence = getFenceMarker(lines[i]); + if (fence) { + activeFence = fence; + mask[i] = true; + } + continue; + } + + mask[i] = true; + if (isClosingFence(lines[i], activeFence)) { + activeFence = null; + } + } + + return mask; +} diff --git a/src/core/parsers/markdown-parser.ts b/src/core/parsers/markdown-parser.ts index abad78df2..e44f7fdde 100644 --- a/src/core/parsers/markdown-parser.ts +++ b/src/core/parsers/markdown-parser.ts @@ -1,4 +1,5 @@ import { Spec, Change, Requirement, Scenario, Delta, DeltaOperation } from '../schemas/index.js'; +import { buildCodeFenceMask } from './code-fence.js'; export interface Section { level: number; @@ -24,51 +25,7 @@ export class MarkdownParser { } protected static buildCodeFenceMask(lines: string[]): boolean[] { - const mask = new Array(lines.length).fill(false); - let activeFence: { marker: '`' | '~'; length: number } | null = null; - - for (let i = 0; i < lines.length; i++) { - const fence = MarkdownParser.getFenceMarker(lines[i]); - - if (!activeFence) { - if (fence) { - activeFence = fence; - mask[i] = true; - } - continue; - } - - mask[i] = true; - if (MarkdownParser.isClosingFence(lines[i], activeFence)) { - activeFence = null; - } - } - - return mask; - } - - private static getFenceMarker(line: string): { marker: '`' | '~'; length: number } | null { - const fenceMatch = line.match(/^\s*(`{3,}|~{3,})/); - if (!fenceMatch) { - return null; - } - - return { - marker: fenceMatch[1][0] as '`' | '~', - length: fenceMatch[1].length, - }; - } - - private static isClosingFence( - line: string, - activeFence: { marker: '`' | '~'; length: number } - ): boolean { - const fenceMatch = line.match(/^\s*(`{3,}|~{3,})\s*$/); - return Boolean( - fenceMatch && - fenceMatch[1][0] === activeFence.marker && - fenceMatch[1].length >= activeFence.length - ); + return buildCodeFenceMask(lines); } parseSpec(name: string): Spec { diff --git a/src/core/parsers/requirement-blocks.ts b/src/core/parsers/requirement-blocks.ts index afc55f891..ab0e4cc04 100644 --- a/src/core/parsers/requirement-blocks.ts +++ b/src/core/parsers/requirement-blocks.ts @@ -1,3 +1,5 @@ +import { buildCodeFenceMask } from './code-fence.js'; + export interface RequirementBlock { headerLine: string; // e.g., '### Requirement: Something' name: string; // e.g., 'Something' @@ -24,7 +26,8 @@ const REQUIREMENT_HEADER_REGEX = /^###\s*Requirement:\s*(.+)\s*$/i; export function extractRequirementsSection(content: string): RequirementsSectionParts { const normalized = normalizeLineEndings(content); const lines = normalized.split('\n'); - const reqHeaderIndex = lines.findIndex(l => /^##\s+Requirements\s*$/i.test(l)); + const fenceMask = buildCodeFenceMask(lines); + const reqHeaderIndex = lines.findIndex((l, i) => !fenceMask[i] && /^##\s+Requirements\s*$/i.test(l)); if (reqHeaderIndex === -1) { // No requirements section; create an empty one at the end @@ -42,7 +45,7 @@ export function extractRequirementsSection(content: string): RequirementsSection // Find end of this section: next line that starts with '## ' at same or higher level let endIndex = lines.length; for (let i = reqHeaderIndex + 1; i < lines.length; i++) { - if (/^##\s+/.test(lines[i])) { + if (!fenceMask[i] && /^##\s+/.test(lines[i])) { endIndex = i; break; } @@ -51,6 +54,11 @@ export function extractRequirementsSection(content: string): RequirementsSection const before = lines.slice(0, reqHeaderIndex).join('\n'); const headerLine = lines[reqHeaderIndex]; const sectionBodyLines = lines.slice(reqHeaderIndex + 1, endIndex); + const sectionBodyMask = fenceMask.slice(reqHeaderIndex + 1, endIndex); + const isRequirementHeader = (cursor: number): boolean => + !sectionBodyMask[cursor] && REQUIREMENT_HEADER_REGEX.test(sectionBodyLines[cursor]); + const isTopLevelHeader = (cursor: number): boolean => + !sectionBodyMask[cursor] && /^##\s+/.test(sectionBodyLines[cursor]); // Parse requirement blocks within section body const blocks: RequirementBlock[] = []; @@ -58,25 +66,24 @@ export function extractRequirementsSection(content: string): RequirementsSection let preambleLines: string[] = []; // Collect preamble lines until first requirement header - while (cursor < sectionBodyLines.length && !REQUIREMENT_HEADER_REGEX.test(sectionBodyLines[cursor])) { + while (cursor < sectionBodyLines.length && !isRequirementHeader(cursor)) { preambleLines.push(sectionBodyLines[cursor]); cursor++; } while (cursor < sectionBodyLines.length) { - const headerStart = cursor; const headerLineCandidate = sectionBodyLines[cursor]; - const headerMatch = headerLineCandidate.match(REQUIREMENT_HEADER_REGEX); - if (!headerMatch) { + if (!isRequirementHeader(cursor)) { // Not a requirement header; skip line defensively cursor++; continue; } + const headerMatch = headerLineCandidate.match(REQUIREMENT_HEADER_REGEX)!; const name = normalizeRequirementName(headerMatch[1]); cursor++; // Gather lines until next requirement header or end of section const bodyLines: string[] = [headerLineCandidate]; - while (cursor < sectionBodyLines.length && !REQUIREMENT_HEADER_REGEX.test(sectionBodyLines[cursor]) && !/^##\s+/.test(sectionBodyLines[cursor])) { + while (cursor < sectionBodyLines.length && !isRequirementHeader(cursor) && !isTopLevelHeader(cursor)) { bodyLines.push(sectionBodyLines[cursor]); cursor++; } @@ -113,12 +120,24 @@ function normalizeLineEndings(content: string): string { return content.replace(/\r\n?/g, '\n'); } +/** + * A slice of a document represented as its lines plus a parallel mask marking + * lines that live inside fenced code blocks (which must be ignored when + * detecting Markdown structure). + */ +interface SectionBody { + lines: string[]; + fenceMask: boolean[]; +} + /** * Parse a delta-formatted spec change file content into a DeltaPlan with raw blocks. */ export function parseDeltaSpec(content: string): DeltaPlan { const normalized = normalizeLineEndings(content); - const sections = splitTopLevelSections(normalized); + const lines = normalized.split('\n'); + const fenceMask = buildCodeFenceMask(lines); + const sections = splitTopLevelSections(lines, fenceMask); const addedLookup = getSectionCaseInsensitive(sections, 'ADDED Requirements'); const modifiedLookup = getSectionCaseInsensitive(sections, 'MODIFIED Requirements'); const removedLookup = getSectionCaseInsensitive(sections, 'REMOVED Requirements'); @@ -141,50 +160,55 @@ export function parseDeltaSpec(content: string): DeltaPlan { }; } -function splitTopLevelSections(content: string): Record { - const lines = content.split('\n'); - const result: Record = {}; - const indices: Array<{ title: string; index: number; level: number }> = []; +function splitTopLevelSections(lines: string[], fenceMask: boolean[]): Record { + const result: Record = {}; + const indices: Array<{ title: string; index: number }> = []; for (let i = 0; i < lines.length; i++) { + if (fenceMask[i]) continue; const m = lines[i].match(/^(##)\s+(.+)$/); if (m) { - const level = m[1].length; // only care for '##' - indices.push({ title: m[2].trim(), index: i, level }); + indices.push({ title: m[2].trim(), index: i }); } } for (let i = 0; i < indices.length; i++) { const current = indices[i]; const next = indices[i + 1]; - const body = lines.slice(current.index + 1, next ? next.index : lines.length).join('\n'); - result[current.title] = body; + const end = next ? next.index : lines.length; + result[current.title] = { + lines: lines.slice(current.index + 1, end), + fenceMask: fenceMask.slice(current.index + 1, end), + }; } return result; } -function getSectionCaseInsensitive(sections: Record, desired: string): { body: string; found: boolean } { +const EMPTY_SECTION_BODY: SectionBody = { lines: [], fenceMask: [] }; + +function getSectionCaseInsensitive(sections: Record, desired: string): { body: SectionBody; found: boolean } { const target = desired.toLowerCase(); for (const [title, body] of Object.entries(sections)) { if (title.toLowerCase() === target) return { body, found: true }; } - return { body: '', found: false }; + return { body: EMPTY_SECTION_BODY, found: false }; } -function parseRequirementBlocksFromSection(sectionBody: string): RequirementBlock[] { - if (!sectionBody) return []; - const lines = normalizeLineEndings(sectionBody).split('\n'); +function parseRequirementBlocksFromSection(sectionBody: SectionBody): RequirementBlock[] { + const { lines, fenceMask } = sectionBody; + if (lines.length === 0) return []; + const isRequirementHeader = (i: number): boolean => !fenceMask[i] && REQUIREMENT_HEADER_REGEX.test(lines[i]); + const isTopLevelHeader = (i: number): boolean => !fenceMask[i] && /^##\s+/.test(lines[i]); const blocks: RequirementBlock[] = []; let i = 0; while (i < lines.length) { // Seek next requirement header - while (i < lines.length && !REQUIREMENT_HEADER_REGEX.test(lines[i])) i++; + while (i < lines.length && !isRequirementHeader(i)) i++; if (i >= lines.length) break; const headerLine = lines[i]; - const m = headerLine.match(REQUIREMENT_HEADER_REGEX); - if (!m) { i++; continue; } + const m = headerLine.match(REQUIREMENT_HEADER_REGEX)!; const name = normalizeRequirementName(m[1]); const buf: string[] = [headerLine]; i++; - while (i < lines.length && !REQUIREMENT_HEADER_REGEX.test(lines[i]) && !/^##\s+/.test(lines[i])) { + while (i < lines.length && !isRequirementHeader(i) && !isTopLevelHeader(i)) { buf.push(lines[i]); i++; } @@ -193,11 +217,13 @@ function parseRequirementBlocksFromSection(sectionBody: string): RequirementBloc return blocks; } -function parseRemovedNames(sectionBody: string): string[] { - if (!sectionBody) return []; +function parseRemovedNames(sectionBody: SectionBody): string[] { + const { lines, fenceMask } = sectionBody; + if (lines.length === 0) return []; const names: string[] = []; - const lines = normalizeLineEndings(sectionBody).split('\n'); - for (const line of lines) { + for (let i = 0; i < lines.length; i++) { + if (fenceMask[i]) continue; + const line = lines[i]; const m = line.match(REQUIREMENT_HEADER_REGEX); if (m) { names.push(normalizeRequirementName(m[1])); @@ -212,12 +238,14 @@ function parseRemovedNames(sectionBody: string): string[] { return names; } -function parseRenamedPairs(sectionBody: string): Array<{ from: string; to: string }> { - if (!sectionBody) return []; +function parseRenamedPairs(sectionBody: SectionBody): Array<{ from: string; to: string }> { + const { lines, fenceMask } = sectionBody; + if (lines.length === 0) return []; const pairs: Array<{ from: string; to: string }> = []; - const lines = normalizeLineEndings(sectionBody).split('\n'); let current: { from?: string; to?: string } = {}; - for (const line of lines) { + for (let i = 0; i < lines.length; i++) { + if (fenceMask[i]) continue; + const line = lines[i]; const fromMatch = line.match(/^\s*-?\s*FROM:\s*`?###\s*Requirement:\s*(.+?)`?\s*$/); const toMatch = line.match(/^\s*-?\s*TO:\s*`?###\s*Requirement:\s*(.+?)`?\s*$/); if (fromMatch) { diff --git a/src/core/parsers/spec-structure.ts b/src/core/parsers/spec-structure.ts index cfcfe0b1b..17a9be0bd 100644 --- a/src/core/parsers/spec-structure.ts +++ b/src/core/parsers/spec-structure.ts @@ -1,3 +1,5 @@ +import { buildCodeFenceMask } from './code-fence.js'; + const REQUIREMENTS_SECTION_HEADER = /^##\s+Requirements\s*$/i; const TOP_LEVEL_SECTION_HEADER = /^##\s+/; const DELTA_HEADER = /^##\s+(ADDED|MODIFIED|REMOVED|RENAMED)\s+Requirements\s*$/i; @@ -75,43 +77,6 @@ export function findMainSpecStructureIssues(content: string): MainSpecStructureI export function stripFencedCodeBlocksPreservingLines(content: string): string { const lines = content.split('\n'); - const output: string[] = []; - let activeFence: { marker: '`' | '~'; length: number } | null = null; - - for (const line of lines) { - const fenceMatch = line.match(/^\s*(`{3,}|~{3,})(.*)$/); - - if (!activeFence) { - if (fenceMatch) { - activeFence = { - marker: fenceMatch[1][0] as '`' | '~', - length: fenceMatch[1].length, - }; - output.push(''); - } else { - output.push(line); - } - continue; - } - - output.push(''); - - if (isClosingFence(line, activeFence)) { - activeFence = null; - } - } - - return output.join('\n'); -} - -function isClosingFence( - line: string, - activeFence: { marker: '`' | '~'; length: number } -): boolean { - const fenceMatch = line.match(/^\s*(`{3,}|~{3,})\s*$/); - return Boolean( - fenceMatch && - fenceMatch[1][0] === activeFence.marker && - fenceMatch[1].length >= activeFence.length - ); + const mask = buildCodeFenceMask(lines); + return lines.map((line, i) => (mask[i] ? '' : line)).join('\n'); } diff --git a/src/core/validation/validator.ts b/src/core/validation/validator.ts index 47071ed47..39bfdb4af 100644 --- a/src/core/validation/validator.ts +++ b/src/core/validation/validator.ts @@ -11,6 +11,7 @@ import { VALIDATION_MESSAGES } from './constants.js'; import { parseDeltaSpec, normalizeRequirementName } from '../parsers/requirement-blocks.js'; +import { buildCodeFenceMask } from '../parsers/code-fence.js'; import { findMainSpecStructureIssues } from '../parsers/spec-structure.js'; import { FileSystemUtils } from '../../utils/file-system.js'; @@ -414,15 +415,19 @@ export class Validator { private extractRequirementText(blockRaw: string): string | undefined { const lines = blockRaw.split('\n'); - // Skip header line (index 0) - let i = 1; + const fenceMask = buildCodeFenceMask(lines); - // Find the first substantial text line, skipping metadata and blank lines - for (; i < lines.length; i++) { + // Find the first substantial text line, skipping metadata and blank lines. + // Header line is at index 0, so start at 1. + for (let i = 1; i < lines.length; i++) { const line = lines[i]; - // Stop at scenario headers - if (/^####\s+/.test(line)) break; + // Stop at scenario headers (ignoring "####" lines inside fenced code blocks) + if (!fenceMask[i] && /^####\s+/.test(line)) break; + + // Skip content inside fenced code blocks; it is illustrative, not the + // requirement statement. + if (fenceMask[i]) continue; const trimmed = line.trim(); @@ -463,8 +468,13 @@ export class Validator { } private countScenarios(blockRaw: string): number { - const matches = blockRaw.match(/^####\s+/gm); - return matches ? matches.length : 0; + const lines = blockRaw.split('\n'); + const fenceMask = buildCodeFenceMask(lines); + let count = 0; + for (let i = 0; i < lines.length; i++) { + if (!fenceMask[i] && /^####\s+/.test(lines[i])) count++; + } + return count; } private formatSectionList(sections: string[]): string { diff --git a/test/core/parsers/requirement-blocks.test.ts b/test/core/parsers/requirement-blocks.test.ts index 063593939..798d70c59 100644 --- a/test/core/parsers/requirement-blocks.test.ts +++ b/test/core/parsers/requirement-blocks.test.ts @@ -43,4 +43,80 @@ describe('parseDeltaSpec', () => { expect(result.added.length).toBe(1); expect(result.added[0].name).toBe('NoSpace'); }); + + it('ignores requirement headers and delta sections inside fenced code blocks', () => { + const content = [ + '## ADDED Requirements', + '', + '### Requirement: Real requirement', + 'The system SHALL do the thing.', + '', + '#### Scenario: It works', + '- **WHEN** a user acts', + '- **THEN** it succeeds', + '', + 'Authors may document the delta format like this:', + '', + '```markdown', + '## ADDED Requirements', + '### Requirement: Example only', + '#### Scenario: Example scenario', + '```', + '', + ].join('\n'); + + const result = parseDeltaSpec(content); + expect(result.added.map((b) => b.name)).toEqual(['Real requirement']); + // The fenced example stays inside the real requirement block instead of + // becoming a phantom requirement. + expect(result.added[0].raw).toContain('```markdown'); + }); + + it('ignores REMOVED bullets and RENAMED pairs inside fenced code blocks', () => { + const content = [ + '## REMOVED Requirements', + '- `### Requirement: Actually removed`', + '', + '```markdown', + '- `### Requirement: Documented example`', + '```', + '', + '## RENAMED Requirements', + '- FROM: `### Requirement: Old name`', + '- TO: `### Requirement: New name`', + '', + '```markdown', + '- FROM: `### Requirement: Example old`', + '- TO: `### Requirement: Example new`', + '```', + '', + ].join('\n'); + + const result = parseDeltaSpec(content); + expect(result.removed).toEqual(['Actually removed']); + expect(result.renamed).toEqual([{ from: 'Old name', to: 'New name' }]); + }); +}); + +describe('extractRequirementsSection (fenced code blocks)', () => { + it('does not treat requirement headers inside fenced code blocks as real requirements', () => { + const content = [ + '# Spec', + '', + '## Requirements', + '', + '### Requirement: Real requirement', + 'The system SHALL do the thing.', + '', + 'Example of the format authors should follow:', + '', + '```markdown', + '### Requirement: Example only', + '```', + '', + ].join('\n'); + + const result = extractRequirementsSection(content); + expect(result.bodyBlocks.map((b) => b.name)).toEqual(['Real requirement']); + }); }); diff --git a/test/core/validation.test.ts b/test/core/validation.test.ts index 72ebc2aba..c1eab10d6 100644 --- a/test/core/validation.test.ts +++ b/test/core/validation.test.ts @@ -648,6 +648,76 @@ The system SHALL implement this feature. expect(report.summary.errors).toBe(0); }); + it('does not flag requirement headers/scenarios inside fenced code blocks', async () => { + const changeDir = path.join(testDir, 'test-change-fenced-example'); + const specsDir = path.join(changeDir, 'specs', 'test-spec'); + await fs.mkdir(specsDir, { recursive: true }); + + const deltaSpec = `# Test Spec + +## ADDED Requirements + +### Requirement: Documentation Generator +The system SHALL render a delta example in its output. + +#### Scenario: Renders an example +**Given** a template +**When** documentation is generated +**Then** the following snippet is produced: + +\`\`\`markdown +### Requirement: Example only +#### Scenario: Example scenario +\`\`\` +`; + + const specPath = path.join(specsDir, 'spec.md'); + await fs.writeFile(specPath, deltaSpec); + + const validator = new Validator(true); + const report = await validator.validateChangeDeltaSpecs(changeDir); + + // The fenced "### Requirement: Example only" must not be parsed as a + // second (phantom) requirement, which previously produced a spurious + // "missing requirement text" error. + expect(report.valid).toBe(true); + expect(report.summary.errors).toBe(0); + expect(report.issues.some(i => i.message.includes('Example only'))).toBe(false); + }); + + it('does not count scenario headers inside fenced code blocks toward the required scenario count', async () => { + const changeDir = path.join(testDir, 'test-change-fenced-scenario-only'); + const specsDir = path.join(changeDir, 'specs', 'test-spec'); + await fs.mkdir(specsDir, { recursive: true }); + + const deltaSpec = `# Test Spec + +## ADDED Requirements + +### Requirement: Documentation Generator +The system SHALL render a delta example in its output. + +\`\`\`markdown +#### Scenario: Example scenario +\`\`\` +`; + + const specPath = path.join(specsDir, 'spec.md'); + await fs.writeFile(specPath, deltaSpec); + + const validator = new Validator(true); + const report = await validator.validateChangeDeltaSpecs(changeDir); + + // The only "#### Scenario:" lives inside a fenced code block, so it must + // not count toward the scenario requirement; the validator must still + // flag the requirement as missing a scenario. + expect(report.valid).toBe(false); + expect(report.summary.errors).toBeGreaterThan(0); + expect( + report.issues.some(i => i.message.includes('must include at least one scenario')) + ).toBe(true); + }); + it('should treat delta headers case-insensitively', async () => { const changeDir = path.join(testDir, 'test-change-mixed-case'); const specsDir = path.join(changeDir, 'specs', 'test-spec');