From 92f7426e5fa550ede990626e3fb98253e6b22a7a Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 10:23:59 -0400 Subject: [PATCH 01/17] feat(workspace): add loader diff harness + RFC for loading rewrite Builds the safety net before rewriting the component loader: a deterministic component snapshot + a wrapper that runs two loader instances in parallel and logs diffs to JSONL. Off by default, opt-in via BIT_LOADER_DIFF=1. --- docs/rfcs/component-loading-rewrite/README.md | 329 ++++++++++++++++++ .../SNAPSHOT-CONTRACT.md | 59 ++++ docs/rfcs/component-loading-rewrite/TASKS.md | 71 ++++ e2e/harmony/component-loader-contract.e2e.ts | 273 +++++++++++++++ e2e/harmony/loader-diff-harness.e2e.ts | 101 ++++++ .../workspace-component/loader-diff/diff.ts | 84 +++++ .../loader-diff/harness.ts | 172 +++++++++ .../workspace-component/loader-diff/index.ts | 18 + .../loader-diff/snapshot.ts | 45 +++ scopes/workspace/workspace/workspace.ts | 14 +- 10 files changed, 1165 insertions(+), 1 deletion(-) create mode 100644 docs/rfcs/component-loading-rewrite/README.md create mode 100644 docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md create mode 100644 docs/rfcs/component-loading-rewrite/TASKS.md create mode 100644 e2e/harmony/component-loader-contract.e2e.ts create mode 100644 e2e/harmony/loader-diff-harness.e2e.ts create mode 100644 scopes/workspace/workspace/workspace-component/loader-diff/diff.ts create mode 100644 scopes/workspace/workspace/workspace-component/loader-diff/harness.ts create mode 100644 scopes/workspace/workspace/workspace-component/loader-diff/index.ts create mode 100644 scopes/workspace/workspace/workspace-component/loader-diff/snapshot.ts diff --git a/docs/rfcs/component-loading-rewrite/README.md b/docs/rfcs/component-loading-rewrite/README.md new file mode 100644 index 000000000000..383228d131f0 --- /dev/null +++ b/docs/rfcs/component-loading-rewrite/README.md @@ -0,0 +1,329 @@ +# RFC: Component Loading Mechanism Rewrite + +## Status: Planning + +## Problem Statement + +The current component loading mechanism (`WorkspaceComponentLoader`) is: + +1. **Hard to understand** - 1,029 lines with deeply nested logic, especially `buildLoadGroups()` +2. **Hard to modify** - Changes risk regressions despite extensive e2e test coverage +3. **Performance bottleneck** - Complex load ordering and multiple cache layers add overhead +4. **Blocking new features** - Architecture makes certain improvements difficult + +### Complexity Hotspots + +| File | Lines | Issue | +| ------------------------------- | ----- | ------------------------------------------------------------------------ | +| `workspace-component-loader.ts` | 1,029 | Main complexity - `buildLoadGroups()` alone is 120+ lines of dense logic | +| `workspace.ts` | 2,554 | Too many responsibilities | +| `scope.main.runtime.ts` | 1,481 | Similar issues on scope side | + +### Root Causes of Complexity + +1. **Multi-source loading** - Components come from workspace filesystem AND scope storage +2. **Circular bootstrap** - Need envs to load components, but envs ARE components +3. **Legacy interop** - Dual representation (ConsumerComponent + Component) +4. **Inline computation** - Load order computed inline, not inspectable +5. **Mixed concerns** - Discovery, resolution, hydration, execution all interleaved + +### The Caching Nightmare + +The current caching strategy is particularly problematic: + +**ComponentLoadOptions has 12 boolean flags** that affect how a component loads, but: + +- Cache key only uses 4 of them: `loadExtensions`, `executeLoadSlot`, `loadDocs`, `loadCompositions` +- The other 8 flags are **ignored** when computing cache keys +- Sometimes cache key uses the given loadOptions, sometimes it's hardcoded to `{ loadExtensions: true, executeLoadSlot: true }` +- Cache lookup tries given loadOptions first, then falls back to the hardcoded options + +This leads to **unpredictable cache behavior** - it takes significant time just to figure out why unexpected data comes from the cache. + +**Multiple overlapping caches exist:** + +| Cache | Location | Purpose | +| ----------------------- | ------------------------ | ------------------------- | +| Components cache | WorkspaceComponentLoader | Loaded Component objects | +| Scope components cache | WorkspaceComponentLoader | Components from scope | +| Extensions cache | WorkspaceComponentLoader | Component extensions | +| ConsumerComponent cache | Legacy consumer | Legacy component objects | +| Dependency cache | Filesystem | Resolved dependencies | +| Tree cache | (previously Madge) | Dependency tree structure | + +These caches have **no unified invalidation strategy** and can get out of sync. + +### Recursive Loading & Hidden Control Flow + +**Components loading components loading components...** + +When loading a component, it often needs data from other components (e.g., its env). Those components +may themselves need other components. This creates recursive loading that's very hard to track: + +``` +Load component A + → Needs env B + → Load env B + → Needs env-of-env C + → Load C + → Has extensions D, E + → Load D, E... +``` + +There's no visibility into this chain. When something goes wrong, it's unclear which load triggered what. + +**Legacy hooks obscure control flow** + +The legacy `ConsumerComponent` load calls hooks that jump into Harmony: + +- `onConfigLoad` - fires during config parsing +- `loadDependencies` - fires to resolve dependencies +- Other lifecycle hooks + +These hooks mean the call stack jumps between legacy code and Harmony aspects unpredictably. +Even with a debugger, it's hard to follow what's happening. + +**Goal for V2:** Even though we keep ConsumerComponent, we can make its creation more linear +and predictable. The recursive loading should be explicit in the LoadPlan, not discovered at runtime. + +## Goals + +- [ ] Make loading logic understandable in a 5-minute walkthrough +- [ ] Enable safe modifications without fear of regressions +- [ ] Improve loading performance (measurable benchmark) +- [ ] Maintain full backward compatibility +- [ ] Keep legacy ConsumerComponent interop (not in scope to remove) + +## Non-Goals + +- Removing ConsumerComponent bridge (deferred) +- Changing the Component public API +- Modifying how aspects/extensions work fundamentally + +--- + +## New Architecture + +### Core Principle: Explicit Pipeline with Inspectable Plan + +Instead of computing load order inline, we create an explicit `LoadPlan` that can be: + +- Inspected for debugging +- Tested independently +- Optimized without changing the pipeline + +### Pipeline Phases + +``` +┌─────────────────┐ +│ Discovery │ Find all ComponentIDs to load +│ (Input: IDs) │ Output: Set +└────────┬────────┘ + ▼ +┌─────────────────┐ +│ Resolution │ Resolve dependencies, determine load order +│ │ Output: LoadPlan (topologically sorted) +└────────┬────────┘ + ▼ +┌─────────────────┐ +│ Hydration │ Load raw data from sources (workspace/scope) +│ │ Output: Map +└────────┬────────┘ + ▼ +┌─────────────────┐ +│ Enrichment │ Add aspects, extensions, env descriptors +│ │ Output: Map +└────────┬────────┘ + ▼ +┌─────────────────┐ +│ Assembly │ Build Component objects +│ │ Output: Map +└────────┬────────┘ + ▼ +┌─────────────────┐ +│ Execution │ Run onComponentLoad slots +│ │ Output: Map (final) +└─────────────────┘ +``` + +### Key Data Structures + +#### LoadPlan + +```typescript +interface LoadPlan { + // Phases in execution order + phases: LoadPhase[]; + + // Dependency graph for debugging/visualization + dependencies: Map; + + // Flat list in topological order + loadOrder: ComponentID[]; + + // Metadata + stats: { + totalComponents: number; + workspaceComponents: number; + scopeComponents: number; + envCount: number; + }; +} + +interface LoadPhase { + name: string; + type: 'core-envs' | 'env-of-envs' | 'extensions' | 'components'; + ids: ComponentID[]; + source: 'workspace' | 'scope' | 'both'; + + // For debugging + reason: string; // Why this phase exists +} +``` + +#### ComponentSource + +Unified interface for loading from different sources: + +```typescript +interface ComponentSource { + name: string; + priority: number; + + // Check if this source can provide the component + canLoad(id: ComponentID): Promise; + + // Load raw component data (before enrichment) + loadRaw(id: ComponentID): Promise; + + // Batch loading for performance + loadRawMany(ids: ComponentID[]): Promise>; +} + +class WorkspaceSource implements ComponentSource { + name = 'workspace'; + priority = 1; // Higher priority than scope + // ... implementation +} + +class ScopeSource implements ComponentSource { + name = 'scope'; + priority = 2; + // ... implementation +} +``` + +### Caching Strategy + +Single coherent cache with clear keys: + +```typescript +interface LoaderCache { + // Primary cache - fully loaded components + components: Map; + + // Raw data cache - before enrichment (can be shared) + rawData: Map; + + // Plan cache - avoid recomputing load plans + plans: Map; + + // Methods + invalidate(id: ComponentID): void; + invalidateAll(): void; + getStats(): CacheStats; +} +``` + +--- + +## Lessons from the First Attempt (PR #10086, abandoned) + +A first cut at this rewrite was made on `refactor/component-loading-v2` (Nov 2025 – Jan 2026) and abandoned. The takeaways shape this plan: + +1. **Recursion was the real problem and was never solved.** Env loading triggers component loading, which triggers env loading. The first attempt worked around this by stubbing out the Enrichment phase (returning `{ envsData: {}, depResolverData: {} }`) and bypassing the dep resolver in the workspace source. The "rewrite" therefore did less work than V1 — that's why tests failed. +2. **No safety net before code.** The plan called for a parallel V1-vs-V2 comparison mode; it was never built. V2 was flipped to the default with no objective evidence it produced equivalent output. +3. **All six phases at once is too much surface.** The new pipeline replaced V1's complexity with a different shape of complexity (a 544-line orchestrator looping over phases that each loop over components), and reproduced V1's hidden caches under new names (`scopeComponentsCache` outside the "unified" `LoaderCache`). + +## Migration Strategy: Diff-Harness First, Incremental Seams + +### Step 1 — Diff harness (this PR) + +Before any V2 code: + +1. Build `serializeComponentForDiff(component): NormalizedSnapshot` — a deterministic, sorted serialization of the fields we care about (see [SNAPSHOT-CONTRACT.md](./SNAPSHOT-CONTRACT.md)). +2. Build a wrapper `LoaderDiffHarness` that delegates to a primary loader and a partner loader, snapshots both results, diffs them, and writes diffs to a JSONL file. Returns the primary's result so commands keep working. +3. Wire it into `Workspace` behind `BIT_LOADER_DIFF=1`. +4. Validate it runs **V1-vs-V1 with zero diffs** on a corpus of representative workspaces. (Two `WorkspaceComponentLoader` instances on the same workspace, each with its own cache.) If V1-vs-V1 produces diffs, either the snapshot is non-deterministic or V1 has cold/hot-cache divergence we need to find before V2 starts. + +The harness is opt-in and side-effect-tolerant: enabling it doubles slot fires, so it's a development tool, not a production feature flag. + +### Step 2 — Recursion root-cause spike (separate PR) + +Investigate and document the env↔component recursion in `DECISIONS.md`. Before designing the new pipeline, decide: + +- Pre-pass: extract a synchronous "extension descriptor" pass that doesn't go through `workspace.get`, and fix V1 to use it. +- Lazy env binding: components carry an env reference, which resolves on first use, not load. +- Detected-cycle fallback: accept recursion, detect cycles explicitly, return a stub on cycle. + +A real answer here is a precondition for any structural rewrite. The first attempt designed a pipeline that couldn't accommodate the recursion and ended up bypassing the work that needed to happen. + +### Step 3 — Incremental seams + +One seam per PR, each green on the diff harness before merging: + +- Extract `LoadPlan` construction as a pure function over bitmap + scope state. V1 still does the loading. The plan becomes inspectable and testable. +- Extract Hydration as a separate concern, V1 still drives ordering. +- Etc. + +No big-bang switchover. No `BIT_FEATURES=component-loader-v2` "this is the default now" flag. Each seam is a small refactor that leaves the loader working at every commit. + +### Step 4 — Cleanup + +Once enough seams are extracted that the original loader has shrunk to an orchestrator, decide whether the residual orchestrator is small/clear enough to keep, or whether to replace it. By that point the answer will be obvious; today it isn't. + +--- + +## File Structure (this PR) + +``` +scopes/workspace/workspace/workspace-component/ +├── workspace-component-loader.ts # Unchanged +└── loader-diff/ + ├── index.ts # Public API + env-flag detection + ├── snapshot.ts # serializeComponentForDiff() + ├── diff.ts # diffSnapshots() + └── harness.ts # LoaderDiffHarness wrapper + +docs/rfcs/component-loading-rewrite/ +├── README.md # This file +├── TASKS.md # Task tracking +├── SNAPSHOT-CONTRACT.md # Fields the snapshot covers +└── DECISIONS.md # Decision log (created in Step 2) +``` + +--- + +## Success Criteria + +### For this PR (Step 1) + +1. `BIT_LOADER_DIFF=1 bit status` runs to completion on a representative workspace. +2. The JSONL diff log is empty (V1-vs-V1 produces zero diffs). +3. The harness is off by default. Turning it off has zero behavioral impact. + +### For the rewrite overall + +1. All existing e2e tests pass with the rewritten loader. +2. The diff harness produces zero diffs on the replay corpus. +3. The loader's complexity is reduced **measurably** — fewer hidden caches, no recursive loading, an inspectable load plan. Avoid line-count targets; they encourage optimizing the wrong metric. + +--- + +## References + +- Current loader: `scopes/workspace/workspace/workspace-component/workspace-component-loader.ts` +- Component factory: `scopes/component/component/component-factory.ts` +- Scope loader: `scopes/scope/scope/scope-component-loader.ts` +- Task tracking: [TASKS.md](./TASKS.md) +- Decision log: [DECISIONS.md](./DECISIONS.md) diff --git a/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md b/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md new file mode 100644 index 000000000000..863766978c58 --- /dev/null +++ b/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md @@ -0,0 +1,59 @@ +# Component Snapshot Contract + +The diff harness compares loaders by serializing each loaded `Component` to a +`NormalizedSnapshot` and diffing the two snapshots. The set of fields included +in the snapshot **is** the contract: anything in the snapshot must match +between V1 and V2; anything not in the snapshot, V2 is free to change. + +The snapshot is deliberately small at the start. Fields are added one at a time +as we discover what matters. Adding a field is cheap; removing a field once +something depends on it is not — so err on the side of starting minimal. + +## Determinism rules + +- Arrays are sorted by a stable key (usually `id` or `aspectId`) before serialization. +- Object keys are emitted in sorted order (use `JSON.stringify` with a replacer + that sorts keys). +- Versions, hashes, and other identifiers are emitted as strings. +- Timestamp fields are excluded. +- File contents are excluded — too large, and orthogonal to the loader's job. + +## Fields (v0 — minimal) + +| Field | Source | Notes | +| -------------- | ------------------------------------- | ------------------------------------------- | +| `id` | `component.id.toString()` | Full id including scope and version | +| `head` | `component.head?.toString() ?? null` | Scope HEAD hash, or null for new components | +| `tags` | sorted versions from `component.tags` | Tag versions present in scope | +| `extensionIds` | sorted aspect ids from `state.config` | Just the ids first; data added in v1 | + +## Fields (v1 — once v0 is stable on V1-vs-V1) + +| Field | Source | Notes | +| ------------ | ---------------------------------------------------- | ------------------------------------------- | +| `extensions` | `state.config.extensions`, sorted by id, with `data` | Full extension payloads, JSON-stable | +| `envId` | env descriptor id | Resolved env, e.g. `teambit.harmony/aspect` | +| `envType` | env descriptor type | | +| `aspects` | `state.aspects` entries, sorted by id, with `config` | Post-`onComponentLoad` slot state | + +## Fields (v2 — once v1 is stable) + +| Field | Source | Notes | +| -------------- | ------------------------------------- | ------------------------------------------------ | +| `dependencies` | from `dep-resolver` data on component | Sorted by package name; capture version + scope | +| `isModified` | `component.isModified()` | The single most failure-prone consumer of loader | + +## Out of scope (intentionally) + +- File contents, file paths, `state.filesystem` — orthogonal to loader correctness. +- Slot subscriber side effects (writes to other state) — covered by e2e tests, not by the snapshot. +- Cache hit/miss counts — observable indirectly via performance, not via output equivalence. +- Component object identity — V2 may construct different `Component` instances. We compare values, not references. + +## How to extend + +When you find a behavior that the harness misses (something diverges between +V1 and the rewritten loader and the harness reports zero diffs), add the +relevant field here, update `snapshot.ts`, and rerun the V1-vs-V1 baseline. If +V1-vs-V1 isn't zero on the new field, the field needs a normalization rule +before it can join the contract. diff --git a/docs/rfcs/component-loading-rewrite/TASKS.md b/docs/rfcs/component-loading-rewrite/TASKS.md new file mode 100644 index 000000000000..5dfed2c0d247 --- /dev/null +++ b/docs/rfcs/component-loading-rewrite/TASKS.md @@ -0,0 +1,71 @@ +# Component Loading Rewrite — Task Tracking + +**Legend:** `[ ]` not started · `[~]` in progress · `[x]` done · `[!]` blocked + +This is a fresh start after PR #10086 was abandoned. See README.md for why +the previous approach was discarded. + +--- + +## Step 1 — Diff Harness (this PR) + +> Build the safety net before any V2 code. + +### Snapshot contract + +- [x] Define `NormalizedSnapshot` (v0 fields: id, head, tags, extensionIds) +- [x] Document the contract (`SNAPSHOT-CONTRACT.md`) +- [ ] Extend to v1 fields (extensions with data, envId, envType, aspects post-slot) +- [ ] Extend to v2 fields (dependencies, isModified) + +### Harness scaffolding + +- [x] `serializeComponentForDiff` (`snapshot.ts`) +- [x] `diffSnapshots`, `diffResultSets` (`diff.ts`) +- [x] `LoaderDiffHarness` wrapper (`harness.ts`) +- [x] `BIT_LOADER_DIFF=1` env-flag detection (`index.ts`) +- [x] Wire into `Workspace` constructor + +### V1-vs-V1 baseline + +- [ ] Run `BIT_LOADER_DIFF=1 bit status` on this workspace; verify zero diffs +- [ ] Run `BIT_LOADER_DIFF=1 bit list` on this workspace; verify zero diffs +- [ ] Run `BIT_LOADER_DIFF=1 bit show ` on a few components; verify zero diffs +- [ ] Run on a workspace with custom envs; verify zero diffs +- [ ] Run on a workspace mid-lane; verify zero diffs +- [ ] Document the replay corpus in `docs/rfcs/component-loading-rewrite/REPLAY-CORPUS.md` + +### Contract tests + +- [x] Cherry-pick `e2e/harmony/component-loader-contract.e2e.ts` from PR #10086 + +--- + +## Step 2 — Recursion Root-Cause Spike (separate PR) + +> Understand and decide before designing the new pipeline. + +- [ ] Trace the env↔component recursion in V1; document the call chain +- [ ] Evaluate options: pre-pass, lazy env binding, cycle detection +- [ ] Write up findings in `DECISIONS.md` +- [ ] Get alignment on the approach before Step 3 + +--- + +## Step 3 — Incremental Seams (one PR each) + +> Each seam is green on the diff harness before merging. No big-bang switchover. + +- [ ] Extract `LoadPlan` construction as a pure function (V1 still drives loading) +- [ ] Extract Hydration as a separate concern +- [ ] Extract Enrichment as a separate concern (depends on Step 2 outcome) +- [ ] Extract Assembly +- [ ] Extract Execution + +--- + +## Step 4 — Cleanup + +- [ ] Decide whether to rewrite the orchestrator or keep the residual +- [ ] Remove the diff harness (or keep as a permanent debug tool, TBD) +- [ ] Update RFC with final architecture diff --git a/e2e/harmony/component-loader-contract.e2e.ts b/e2e/harmony/component-loader-contract.e2e.ts new file mode 100644 index 000000000000..b04ca4903f7c --- /dev/null +++ b/e2e/harmony/component-loader-contract.e2e.ts @@ -0,0 +1,273 @@ +/** + * Component Loader Contract Tests + * + * These tests define the expected behaviors of the component loading mechanism. + * They serve as a safety net for the V2 loader rewrite - any new implementation + * must pass these tests to ensure behavioral compatibility. + * + * The tests focus on OBSERVABLE OUTCOMES, not implementation details. + */ +import { expect } from 'chai'; +import { Helper } from '@teambit/legacy.e2e-helper'; + +describe('component loader contract tests', function () { + this.timeout(0); + let helper: Helper; + + before(() => { + helper = new Helper(); + }); + + after(() => { + helper.scopeHelper.destroy(); + }); + + describe('loading workspace-only components', () => { + before(() => { + helper.scopeHelper.reInitWorkspace(); + helper.fixtures.populateComponents(2); + }); + + it('should load a new component that exists only in workspace (not tagged)', () => { + const show = helper.command.showComponent('comp1'); + expect(show).to.include('comp1'); + }); + + it('should list all workspace components', () => { + const list = helper.command.listParsed(); + expect(list).to.have.lengthOf(2); + }); + + it('should show component status as new', () => { + const status = helper.command.statusJson(); + expect(status.newComponents).to.have.lengthOf(2); + }); + }); + + describe('loading scope-only components', () => { + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.populateComponents(1); + helper.command.tagAllWithoutBuild(); + helper.command.export(); + // Re-init workspace without the component files + helper.scopeHelper.reInitWorkspace(); + helper.scopeHelper.addRemoteScope(); + }); + + it('should show remote component without importing', () => { + const show = helper.command.showComponent(`${helper.scopes.remote}/comp1 --remote`); + expect(show).to.include('comp1'); + }); + + it('should not import objects when just showing', () => { + const objects = helper.command.catScope(); + expect(objects).to.have.lengthOf(0); + }); + }); + + describe('loading components with workspace + scope data (merged)', () => { + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.populateComponents(1); + helper.command.tagAllWithoutBuild(); + helper.command.export(); + // Modify the component locally + helper.fs.appendFile('comp1/index.js', '\n// modified'); + }); + + it('should show component as modified', () => { + const status = helper.command.statusJson(); + expect(status.modifiedComponents).to.have.lengthOf(1); + }); + + it('should load component with local modifications merged with scope data', () => { + const show = helper.command.showComponent('comp1'); + // Component should have version from scope but show as modified + expect(show).to.include('comp1'); + }); + }); + + describe('loading out-of-sync components', () => { + describe('bitmap has no version but scope has tagged version', () => { + let scopeOutOfSync: string; + + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.createComponentBarFoo(); + helper.fixtures.addComponentBarFoo(); + const bitMap = helper.bitMap.read(); + helper.fixtures.tagComponentBarFoo(); + // Revert bitmap to pre-tag state (simulating out-of-sync) + helper.bitMap.write(bitMap); + scopeOutOfSync = helper.scopeHelper.cloneWorkspace(); + }); + + it('should sync bitmap to match scope on status', () => { + helper.scopeHelper.getClonedWorkspace(scopeOutOfSync); + helper.command.status(); + const bitMap = helper.bitMap.read(); + expect(bitMap['bar/foo'].version).to.equal('0.0.1'); + }); + }); + + describe('bitmap shows exported but scope shows only tagged', () => { + let scopeOutOfSync: string; + + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.createComponentBarFoo(); + helper.fixtures.addComponentBarFoo(); + helper.fixtures.tagComponentBarFoo(); + const bitMapBeforeExport = helper.bitMap.read(); + helper.command.export(); + // Revert bitmap to pre-export state + helper.bitMap.write(bitMapBeforeExport); + scopeOutOfSync = helper.scopeHelper.cloneWorkspace(); + }); + + it('should sync bitmap to match scope (exported state)', () => { + helper.scopeHelper.getClonedWorkspace(scopeOutOfSync); + helper.command.status(); + const bitMap = helper.bitMap.read(); + expect(bitMap['bar/foo'].scope).to.equal(helper.scopes.remote); + }); + }); + }); + + describe('loading components with extensions', () => { + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.populateComponents(1); + }); + + it('should load component with its configured extensions', () => { + const show = helper.command.showComponent('comp1'); + // All components have at least the env extension - check for "aspects" in output + expect(show).to.include('aspects'); + }); + }); + + describe('loading components with custom env', () => { + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.populateComponents(1); + helper.command.setEnv('comp1', 'teambit.harmony/aspect'); + }); + + it('should load component with the correct env', () => { + const envId = helper.env.getComponentEnv('comp1'); + expect(envId).to.include('teambit.harmony/aspect'); + }); + + it('should show the env in component details', () => { + const show = helper.command.showComponent('comp1'); + expect(show).to.include('env'); + }); + }); + + describe('loading multiple components with shared dependencies', () => { + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + // Create comp2 first + helper.fs.outputFile('comp2/index.js', 'module.exports = {}'); + helper.command.addComponent('comp2'); + // Create comp1 that depends on comp2 using the correct scope name + helper.fs.outputFile('comp1/index.js', `const comp2 = require('@${helper.scopes.remote}/comp2');`); + helper.command.addComponent('comp1'); + }); + + it('should load both components correctly', () => { + const list = helper.command.listParsed(); + expect(list).to.have.lengthOf(2); + }); + + it('should detect dependency relationship', () => { + const deps = helper.command.getCompDepsIdsFromData('comp1'); + const hasComp2Dep = deps.some((depId: string) => depId.includes('comp2')); + expect(hasComp2Dep).to.be.true; + }); + }); + + describe('caching behavior', () => { + before(() => { + helper.scopeHelper.reInitWorkspace(); + helper.fixtures.populateComponents(1); + }); + + it('should return same component data when loaded twice', () => { + const show1 = helper.command.showComponent('comp1'); + const show2 = helper.command.showComponent('comp1'); + expect(show1).to.equal(show2); + }); + + it('should reflect file changes after modification', () => { + const statusBefore = helper.command.statusJson(); + expect(statusBefore.newComponents).to.have.lengthOf(1); + + helper.fs.appendFile('comp1/index.js', '\n// modified'); + + // Status should still show component (cache should not prevent seeing changes) + const statusAfter = helper.command.statusJson(); + expect(statusAfter.newComponents).to.have.lengthOf(1); + }); + }); + + describe('error handling', () => { + before(() => { + helper.scopeHelper.reInitWorkspace(); + }); + + it('should throw meaningful error for non-existent component', () => { + let error: Error | null = null; + try { + helper.command.showComponent('non-existent-component'); + } catch (e: any) { + error = e; + } + expect(error).to.not.be.null; + }); + }); + + describe('loading tagged but not exported components', () => { + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.populateComponents(1); + helper.command.tagAllWithoutBuild(); + }); + + it('should show component as staged', () => { + const status = helper.command.statusJson(); + expect(status.stagedComponents).to.have.lengthOf(1); + }); + + it('should load component with version from tag', () => { + const show = helper.command.showComponent('comp1'); + expect(show).to.include('0.0.1'); + }); + }); + + describe('loading components after import', () => { + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.populateComponents(1); + helper.command.tagAllWithoutBuild(); + helper.command.export(); + + // Create a new workspace and import the component + helper.scopeHelper.reInitWorkspace(); + helper.scopeHelper.addRemoteScope(); + helper.command.importComponent('comp1'); + }); + + it('should load imported component', () => { + const list = helper.command.listParsed(); + expect(list).to.have.lengthOf(1); + }); + + it('should show component as not modified', () => { + const status = helper.command.statusJson(); + expect(status.modifiedComponents).to.have.lengthOf(0); + }); + }); +}); diff --git a/e2e/harmony/loader-diff-harness.e2e.ts b/e2e/harmony/loader-diff-harness.e2e.ts new file mode 100644 index 000000000000..110716b8e3c3 --- /dev/null +++ b/e2e/harmony/loader-diff-harness.e2e.ts @@ -0,0 +1,101 @@ +/** + * Diff harness validation tests. + * + * The diff harness (BIT_LOADER_DIFF=1) runs two loader instances in parallel + * and writes diffs to a JSONL log. In V1-vs-V1 mode, the log should contain + * only header lines — any data lines mean either the snapshot is non-deterministic + * or V1 has cold/hot-cache divergence we need to find before V2 is built. + */ +import fs from 'fs'; +import os from 'os'; +import path from 'path'; +import { expect } from 'chai'; +import { Helper } from '@teambit/legacy.e2e-helper'; + +interface DiffLogLine { + header?: boolean; + callId?: number; + operation?: string; + ids?: string[]; + diff?: unknown; + partnerError?: string; +} + +function readDiffLog(logPath: string): DiffLogLine[] { + if (!fs.existsSync(logPath)) return []; + const content = fs.readFileSync(logPath, 'utf-8').trim(); + if (!content) return []; + return content.split('\n').map((line) => JSON.parse(line)); +} + +function dataLines(lines: DiffLogLine[]): DiffLogLine[] { + return lines.filter((l) => !l.header); +} + +describe('loader diff harness — V1-vs-V1 baseline', function () { + this.timeout(0); + let helper: Helper; + let logPath: string; + + before(() => { + helper = new Helper(); + }); + + after(() => { + helper.scopeHelper.destroy(); + }); + + beforeEach(() => { + // Unique path per test so parallel runs don't collide. + logPath = path.join(os.tmpdir(), `bit-loader-diff-${process.pid}-${Date.now()}.jsonl`); + if (fs.existsSync(logPath)) fs.unlinkSync(logPath); + }); + + describe('workspace with two new components', () => { + before(() => { + helper.scopeHelper.reInitWorkspace(); + helper.fixtures.populateComponents(2); + }); + + it('bit status writes a harness header and produces zero diffs', () => { + helper.command.runCmd(`bit status`, helper.scopes.localPath, 'pipe', undefined, false, { + BIT_LOADER_DIFF: '1', + BIT_LOADER_DIFF_OUT: logPath, + }); + const lines = readDiffLog(logPath); + expect( + lines.filter((l) => l.header), + 'expected at least one header line' + ).to.have.length.greaterThan(0); + expect( + dataLines(lines), + `expected zero diffs but got: ${JSON.stringify(dataLines(lines), null, 2)}` + ).to.have.lengthOf(0); + }); + }); + + // TODO: re-enable once the harness's memory footprint is acceptable on workspaces + // with scope state. Today, running two WorkspaceComponentLoader instances in parallel + // doubles the cache footprint and can OOM Node's default 4GB heap even on tiny + // workspaces. Likely needs a sampling mode or a lighter-weight partner. + describe.skip('workspace with tagged + modified component', () => { + before(() => { + helper.scopeHelper.setWorkspaceWithRemoteScope(); + helper.fixtures.populateComponents(1); + helper.command.tagAllWithoutBuild(); + helper.fs.appendFile('comp1/index.js', '\n// modified after tag'); + }); + + it('bit status on a modified component produces zero diffs', () => { + helper.command.runCmd(`bit status`, helper.scopes.localPath, 'pipe', undefined, false, { + BIT_LOADER_DIFF: '1', + BIT_LOADER_DIFF_OUT: logPath, + }); + const lines = readDiffLog(logPath); + expect( + dataLines(lines), + `expected zero diffs but got: ${JSON.stringify(dataLines(lines), null, 2)}` + ).to.have.lengthOf(0); + }); + }); +}); diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/diff.ts b/scopes/workspace/workspace/workspace-component/loader-diff/diff.ts new file mode 100644 index 000000000000..cb69c5d6867d --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/loader-diff/diff.ts @@ -0,0 +1,84 @@ +import type { NormalizedSnapshot } from './snapshot'; + +export interface FieldDiff { + field: keyof NormalizedSnapshot | `${keyof NormalizedSnapshot}[${number}]`; + primary: unknown; + partner: unknown; +} + +export interface SnapshotDiff { + id: string; + fields: FieldDiff[]; +} + +/** + * Diff a single pair of snapshots. Returns null if identical. + * + * Both snapshots are produced by serializeComponentForDiff and are already + * normalized (sorted arrays, stable key order), so a structural compare is + * sufficient. + */ +export function diffSnapshots(primary: NormalizedSnapshot, partner: NormalizedSnapshot): SnapshotDiff | null { + const fields: FieldDiff[] = []; + + if (primary.id !== partner.id) { + fields.push({ field: 'id', primary: primary.id, partner: partner.id }); + } + if (primary.head !== partner.head) { + fields.push({ field: 'head', primary: primary.head, partner: partner.head }); + } + diffStringArray('tags', primary.tags, partner.tags, fields); + diffStringArray('extensionIds', primary.extensionIds, partner.extensionIds, fields); + + if (fields.length === 0) return null; + return { id: primary.id, fields }; +} + +/** + * Diff a result set: lists of snapshots from primary and partner. + * + * Components present on one side but not the other are reported as a + * "missing" diff with the full snapshot in the present-side field. + */ +export interface ResultDiff { + missingFromPartner: NormalizedSnapshot[]; + missingFromPrimary: NormalizedSnapshot[]; + componentDiffs: SnapshotDiff[]; +} + +export function diffResultSets(primary: NormalizedSnapshot[], partner: NormalizedSnapshot[]): ResultDiff { + const primaryById = new Map(primary.map((s) => [s.id, s])); + const partnerById = new Map(partner.map((s) => [s.id, s])); + + const missingFromPartner: NormalizedSnapshot[] = []; + const missingFromPrimary: NormalizedSnapshot[] = []; + const componentDiffs: SnapshotDiff[] = []; + + for (const [id, primarySnap] of primaryById) { + const partnerSnap = partnerById.get(id); + if (!partnerSnap) { + missingFromPartner.push(primarySnap); + continue; + } + const diff = diffSnapshots(primarySnap, partnerSnap); + if (diff) componentDiffs.push(diff); + } + + for (const [id, partnerSnap] of partnerById) { + if (!primaryById.has(id)) missingFromPrimary.push(partnerSnap); + } + + return { missingFromPartner, missingFromPrimary, componentDiffs }; +} + +export function isResultDiffEmpty(diff: ResultDiff): boolean { + return ( + diff.missingFromPartner.length === 0 && diff.missingFromPrimary.length === 0 && diff.componentDiffs.length === 0 + ); +} + +function diffStringArray(field: keyof NormalizedSnapshot, a: string[], b: string[], out: FieldDiff[]): void { + if (a.length !== b.length || a.some((v, i) => v !== b[i])) { + out.push({ field, primary: a, partner: b }); + } +} diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts b/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts new file mode 100644 index 000000000000..27c22b407a75 --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts @@ -0,0 +1,172 @@ +import fs from 'fs'; +import path from 'path'; +import os from 'os'; +import type { Component, InvalidComponent } from '@teambit/component'; +import type { ComponentID } from '@teambit/component-id'; +import type { ConsumerComponent } from '@teambit/legacy.consumer-component'; +import type { Logger } from '@teambit/logger'; +import type { ComponentLoadOptions, WorkspaceComponentLoader } from '../workspace-component-loader'; +import { diffResultSets, isResultDiffEmpty } from './diff'; +import type { ResultDiff } from './diff'; +import { serializeComponentForDiff } from './snapshot'; + +type GetManyRes = { + components: Component[]; + invalidComponents: InvalidComponent[]; +}; + +export type LoaderFactory = () => WorkspaceComponentLoader; + +export interface LoaderDiffHarnessOptions { + /** Where to write the JSONL diff log. Defaults to `/bit-loader-diff.jsonl`. */ + outputPath?: string; + /** Tag written into each log line, e.g. "v1-vs-v1" or "v1-vs-v2". */ + comparisonLabel: string; +} + +/** + * Wraps a primary loader and a partner loader, runs both on every call, + * and writes any diffs to a JSONL log. The primary's result is returned — + * the partner is observation-only. + * + * The partner is constructed lazily on first use and reused across calls, + * so its cache state mirrors how it would behave in real usage. Slot side + * effects fire twice when the harness is enabled — this is a development + * tool, not a production wrapper. + */ +export class LoaderDiffHarness { + private partner: WorkspaceComponentLoader | null = null; + private readonly outputPath: string; + private readonly comparisonLabel: string; + private callIndex = 0; + private writeFailureLogged = false; + + constructor( + private primary: WorkspaceComponentLoader, + private partnerFactory: LoaderFactory, + private logger: Logger, + options: LoaderDiffHarnessOptions + ) { + this.outputPath = options.outputPath ?? path.join(os.tmpdir(), 'bit-loader-diff.jsonl'); + this.comparisonLabel = options.comparisonLabel; + this.writeHeader(); + } + + async getMany(ids: ComponentID[], loadOpts?: ComponentLoadOptions, throwOnFailure = true): Promise { + const primaryResult = await this.primary.getMany(ids, loadOpts, throwOnFailure); + await this.observe('getMany', ids, primaryResult.components, () => + this.getPartner() + .getMany(ids, loadOpts, false) + .then((r) => r.components) + ); + return primaryResult; + } + + async get( + componentId: ComponentID, + legacyComponent?: ConsumerComponent, + useCache?: boolean, + storeInCache?: boolean, + loadOpts?: ComponentLoadOptions + ): Promise { + const primaryResult = await this.primary.get(componentId, legacyComponent, useCache, storeInCache, loadOpts); + await this.observe('get', [componentId], [primaryResult], async () => { + // Don't pass legacyComponent — partner should resolve fresh; passing the same + // ConsumerComponent would couple the two loaders and hide divergence. + const partnerResult = await this.getPartner().get(componentId, undefined, useCache, storeInCache, loadOpts); + return [partnerResult]; + }); + return primaryResult; + } + + async getIfExist(componentId: ComponentID): Promise { + const primaryResult = await this.primary.getIfExist(componentId); + await this.observe('getIfExist', [componentId], primaryResult ? [primaryResult] : [], async () => { + const partnerResult = await this.getPartner().getIfExist(componentId); + return partnerResult ? [partnerResult] : []; + }); + return primaryResult; + } + + async getInvalid(ids: ComponentID[]): Promise { + return this.primary.getInvalid(ids); + } + + clearCache(): void { + this.primary.clearCache(); + this.partner?.clearCache(); + } + + clearComponentCache(id: ComponentID): void { + this.primary.clearComponentCache(id); + this.partner?.clearComponentCache(id); + } + + private getPartner(): WorkspaceComponentLoader { + if (!this.partner) { + this.partner = this.partnerFactory(); + } + return this.partner; + } + + private async observe( + operation: string, + ids: ComponentID[], + primaryComponents: Component[], + runPartner: () => Promise + ): Promise { + const callId = this.callIndex++; + let partnerComponents: Component[]; + try { + partnerComponents = await runPartner(); + } catch (err: any) { + this.writeLine({ + callId, + operation, + ids: ids.map((i) => i.toString()), + partnerError: err?.message ?? String(err), + }); + return; + } + + const primarySnapshots = primaryComponents.map(serializeComponentForDiff); + const partnerSnapshots = partnerComponents.map(serializeComponentForDiff); + const diff = diffResultSets(primarySnapshots, partnerSnapshots); + if (isResultDiffEmpty(diff)) return; + + this.writeLine({ + callId, + operation, + ids: ids.map((i) => i.toString()), + diff: summarizeDiff(diff), + }); + } + + private writeHeader(): void { + this.writeLine({ + header: true, + comparisonLabel: this.comparisonLabel, + startedAt: new Date().toISOString(), + pid: process.pid, + cwd: process.cwd(), + }); + } + + private writeLine(record: object): void { + try { + fs.appendFileSync(this.outputPath, `${JSON.stringify(record)}\n`); + } catch (err: any) { + if (this.writeFailureLogged) return; + this.writeFailureLogged = true; + this.logger.warn(`[loader-diff] failed to write to ${this.outputPath}: ${err?.message ?? err}`); + } + } +} + +function summarizeDiff(diff: ResultDiff): object { + return { + missingFromPartner: diff.missingFromPartner.map((s) => s.id), + missingFromPrimary: diff.missingFromPrimary.map((s) => s.id), + componentDiffs: diff.componentDiffs, + }; +} diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/index.ts b/scopes/workspace/workspace/workspace-component/loader-diff/index.ts new file mode 100644 index 000000000000..015292aa2f1f --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/loader-diff/index.ts @@ -0,0 +1,18 @@ +export { serializeComponentForDiff } from './snapshot'; +export type { NormalizedSnapshot } from './snapshot'; +export { diffSnapshots, diffResultSets, isResultDiffEmpty } from './diff'; +export type { FieldDiff, SnapshotDiff, ResultDiff } from './diff'; +export { LoaderDiffHarness } from './harness'; +export type { LoaderFactory, LoaderDiffHarnessOptions } from './harness'; + +/** + * Returns the comparison label if the diff harness is enabled, otherwise null. + * `BIT_LOADER_DIFF=1` (or `=v1-vs-v1`) → V1-vs-V1 baseline mode. + * Any other truthy value is used as the label as-is. + */ +export function loaderDiffMode(): string | null { + const raw = process.env.BIT_LOADER_DIFF; + if (!raw || raw === '0' || raw.toLowerCase() === 'false') return null; + if (raw === '1' || raw.toLowerCase() === 'true') return 'v1-vs-v1'; + return raw; +} diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/snapshot.ts b/scopes/workspace/workspace/workspace-component/loader-diff/snapshot.ts new file mode 100644 index 000000000000..c96b5a0db964 --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/loader-diff/snapshot.ts @@ -0,0 +1,45 @@ +import type { Component } from '@teambit/component'; + +/** + * Deterministic, sorted serialization of a Component for diffing. + * The set of fields in this snapshot IS the contract — see + * docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md. + * + * Anything in the snapshot must match between two loader implementations. + * Anything not in the snapshot, the new loader is free to change. + * + * Start minimal. Add fields as we discover what matters. + */ +export interface NormalizedSnapshot { + id: string; + head: string | null; + tags: string[]; + extensionIds: string[]; +} + +export function serializeComponentForDiff(component: Component): NormalizedSnapshot { + return { + id: component.id.toString(), + head: component.head?.hash ?? null, + tags: collectTags(component), + extensionIds: collectExtensionIds(component), + }; +} + +function collectTags(component: Component): string[] { + const versions: string[] = []; + for (const tag of component.tags.values()) { + versions.push(tag.version.raw); + } + return versions.sort(); +} + +function collectExtensionIds(component: Component): string[] { + const extensions = component.state?.config?.extensions; + if (!extensions) return []; + const ids: string[] = []; + for (const entry of extensions) { + if (entry.stringId) ids.push(entry.stringId); + } + return ids.sort(); +} diff --git a/scopes/workspace/workspace/workspace.ts b/scopes/workspace/workspace/workspace.ts index 3a7752f30ddd..d119067470ad 100644 --- a/scopes/workspace/workspace/workspace.ts +++ b/scopes/workspace/workspace/workspace.ts @@ -112,6 +112,7 @@ import { CompFiles } from './workspace-component/comp-files'; import { Filter } from './filter'; import type { ComponentStatusLegacy, ComponentStatusResult } from './workspace-component/component-status-loader'; import { ComponentStatusLoader } from './workspace-component/component-status-loader'; +import { LoaderDiffHarness, loaderDiffMode } from './workspace-component/loader-diff'; import execa from 'execa'; import { getGitExecutablePath } from '@teambit/git.modules.git-executable'; import { VERSION_ZERO } from '@teambit/objects'; @@ -265,7 +266,18 @@ export class Workspace implements ComponentFactory { private configStore: ConfigStoreMain ) { this.componentLoadedSelfAsAspects = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); - this.componentLoader = new WorkspaceComponentLoader(this, logger, dependencyResolver, envs, aspectLoader); + const primaryLoader = new WorkspaceComponentLoader(this, logger, dependencyResolver, envs, aspectLoader); + const diffMode = loaderDiffMode(); + if (diffMode) { + this.componentLoader = new LoaderDiffHarness( + primaryLoader, + () => new WorkspaceComponentLoader(this, logger, dependencyResolver, envs, aspectLoader), + logger, + { comparisonLabel: diffMode, outputPath: process.env.BIT_LOADER_DIFF_OUT } + ) as unknown as WorkspaceComponentLoader; + } else { + this.componentLoader = primaryLoader; + } this.validateConfig(); this.bitMap = new BitMap(this.consumer.bitMap, this.consumer); this.aspectsMerger = new AspectsMerger(this, this.harmony); From 25b62c33fe81e6499d5e87d07dfa129e205d7c65 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 10:30:42 -0400 Subject: [PATCH 02/17] docs(loader-rewrite): add DECISIONS.md with recursion spike findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The env↔component "recursion" is a DAG topological-ordering problem, not a true cycle. PR #10086 failed because it collapsed V1's two-pass load into a single inline pipeline and lost the ordering guarantee. Recorded in D-001 with file:line evidence. --- .../component-loading-rewrite/DECISIONS.md | 159 ++++++++++++++++++ docs/rfcs/component-loading-rewrite/TASKS.md | 25 ++- 2 files changed, 177 insertions(+), 7 deletions(-) create mode 100644 docs/rfcs/component-loading-rewrite/DECISIONS.md diff --git a/docs/rfcs/component-loading-rewrite/DECISIONS.md b/docs/rfcs/component-loading-rewrite/DECISIONS.md new file mode 100644 index 000000000000..324207010083 --- /dev/null +++ b/docs/rfcs/component-loading-rewrite/DECISIONS.md @@ -0,0 +1,159 @@ +# Decision Log + +Decisions feeding the component-loading rewrite. Each entry: what was decided, +why, and what evidence shaped it. New decisions go at the top. + +--- + +## D-001: The env↔component recursion is a topological-ordering problem, not a cycle + +**Date:** 2026-05-07 +**Status:** Accepted + +### Context + +PR #10086's V2 loader stubbed out the Enrichment phase (returning empty +`envsData` and `depResolverData`) and bypassed the dependency resolver because +calling them inline triggered recursive `workspace.get()` calls. We need to +understand the actual cycle before designing the new pipeline — otherwise any +rewrite ends up with the same workaround. + +### Investigation: what the cycle actually is + +There are three distinct call paths that look like recursion. Only one is a +real DAG-ordering problem; the other two are gated explicitly in V1. + +**Path 1 — Env loading (the real one).** When the dep resolver computes a +component's env policy, it needs the env _Component object_ to read its +manifest: + +``` +WorkspaceComponentLoader.loadOne (loader.ts:790) + → DependencyResolverMain.getEnvPolicyFromFile (dep-resolver.ts:1221) + → EnvsMain.getEnvComponentByEnvId (environments.ts:490) + → host.get(envId) (environments.ts:493) + → WorkspaceComponentLoader.get (loader.ts:704) + → loadOne → cycle +``` + +This is a **DAG**, not a true cycle, _unless_ env A depends on env A (which is +disallowed). Component → env → env-of-env → core-env terminates at the core +envs. The "infinite loop" symptom appears only when the loader doesn't +guarantee envs are loaded before their dependents. + +**Path 2 — `componentExtensions` with `loadExtensions: true`.** Calling +`Workspace.componentExtensions` (`workspace.ts:1647`) with `loadExtensions: +true` invokes `loadComponentsExtensions` → `loadAspects` → +`importAndGetAspects` → `workspace.importAndGetMany` → `workspace.get`. This +is a real cycle that V1 tames by: + +- Always passing `loadExtensions: false` from the bulk loader + (`loader.ts:127`, `:517`, `:576`, `:805`). +- Setting `idsToNotLoadAsAspects` in `importAndGetAspects` so the loader + short-circuits when asked to load the seeders as aspects + (`workspace-aspects-loader.ts:771-776`, with explicit comment: _"once you + try to load the seeder it will try to load the workspace component that + will arrive here again and again"_). +- Memoizing `componentLoadedSelfAsAspects` so each component is "self-loaded + as an aspect" at most once (`loader.ts:457-480`). + +**Path 3 — `warnAboutMisconfiguredEnv`.** Calls `this.get(parsedEnvId)` from +inside `componentExtensions` (`workspace.ts:1677`). Gated behind +`loadExtensions: true` and only fires post-load. + +### Investigation: how V1 actually terminates Path 1 + +V1 uses **topological pre-ordering** in `buildLoadGroups` (`loader.ts:218-341`): + +1. Core envs first (`teambit.harmony` envs that don't need loading). +2. Workspace-component envs, layered: + `regroupEnvsIdsFromTheList` (`loader.ts:358-378`) splits the env list into + "envs that are envs of other envs in this list" and "everything else", and + loads the first group before the second. +3. Extensions, also layered. +4. Regular components last. + +The trick is that by the time a regular component needs its env, the env is +already in `componentsCache` and `getEnvComponentByEnvId` returns from cache — +no recursion. + +V1's blind spot is documented in code: `regroupEnvsIdsFromTheList` only +handles **one level** of env-of-env (the comment at `loader.ts:353` says _"At +the moment this function is not recursive, in the future we might want to +make it recursive"_). For deeper chains (env-of-env-of-env), V1 falls back on +hardcoded special cases for the bit core repo (`loader.ts:311-323`). + +### Decision + +Treat env loading as a topological-ordering problem, not as something to +"break" with a cycle detector or a stub. Specifically: + +1. **Keep V1's two-pass shape.** Bulk-load components with + `loadExtensions: false, executeLoadSlot: false`, then run + `loadComponentsExtensions` and slot execution in a second pass. This is + what makes the cycle in Path 2 well-behaved; the V2 attempt collapsed + these into one "Enrichment phase" and lost the ordering guarantee. + +2. **Make env-DAG resolution properly recursive.** Replace + `regroupEnvsIdsFromTheList`'s one-level grouping with a real topological + sort over the env-of-env DAG. The hardcoded `core-aspect-env` workaround + at `loader.ts:311-323` should fall out of this — if the sort is correct, + no special-case is needed. + +3. **Avoid `getEnvComponentByEnvId` during the inline enrichment of regular + components.** What enrichment actually needs from the env is the env's + _descriptor_ (`envId`, `type`, `services`), not the full Component. In V1 + this is already partially split: `EnvsMain.calculateEnv` + (`environments.ts:657`) is **synchronous** and reads only the component's + own aspect data. Use that for the descriptor, and only fall back to + `getEnvComponentByEnvId` for operations that genuinely need the env's + files (env manifest, env policy from file). + +4. **Don't introduce a "lazy env binding" thunk on Component.** It would + change the public API in ways that ripple across consumers expecting + `envs.getEnv(component)` to work synchronously after load. The recursion + isn't bad enough to justify it. + +### What this rules out + +- **A single-pass pipeline with inline enrichment** (the V2 attempt's + approach). Cannot work without re-creating V1's mitigations, at which + point the "single pass" claim is false. + +- **A cycle-detection fallback.** The recursion is a DAG; a cycle detector + would either be triggered by the legitimate DAG depth (and falsely report + cycles) or be implemented as cache-with-in-flight-tracking, which is just + topological ordering done badly. + +- **Lazy env binding.** Out of scope. May revisit if a specific consumer + forces it. + +### What this implies for Step 3 (incremental seams) + +- The first seam to extract is `LoadPlan` construction — specifically a + proper topological sort over the env DAG. Land it as a pure function used + by V1's `buildLoadGroups`. Verify with the diff harness that the sort is + equivalent to V1's current grouping (plus the `core-aspect-env` special + case becoming unnecessary). Only then move on. + +- The Enrichment phase is the last thing to extract, not the first. It's + the phase most entangled with the env recursion, and the easiest to get + wrong. Build everything else first. + +- The "two-pass" shape (load without extensions, then load extensions) is + not a bug to fix. It's the load order that makes the rest tractable. If + the rewrite collapses the two passes, it'll re-create PR #10086's + problems. + +### Evidence index + +| Claim | File:Line | +| ------------------------------------------- | -------------------------------------------------- | +| Bulk load passes `loadExtensions: false` | `workspace-component-loader.ts:127, 517, 576, 805` | +| `getEnvComponentByEnvId` calls `host.get` | `environments.main.runtime.ts:490-498` | +| Dep resolver calls `getEnvComponentByEnvId` | `dependency-resolver.main.runtime.ts:1221` | +| Env load order grouping (one level) | `workspace-component-loader.ts:358-378` | +| Hardcoded `core-aspect-env` ordering | `workspace-component-loader.ts:311-323` | +| `idsToNotLoadAsAspects` recursion break | `workspace-aspects-loader.ts:771-787` | +| `calculateEnv` is synchronous, no recursion | `environments.main.runtime.ts:657-716` | +| `componentLoadedSelfAsAspects` memoization | `workspace-component-loader.ts:457-480` | diff --git a/docs/rfcs/component-loading-rewrite/TASKS.md b/docs/rfcs/component-loading-rewrite/TASKS.md index 5dfed2c0d247..af62dbce0320 100644 --- a/docs/rfcs/component-loading-rewrite/TASKS.md +++ b/docs/rfcs/component-loading-rewrite/TASKS.md @@ -41,26 +41,37 @@ the previous approach was discarded. --- -## Step 2 — Recursion Root-Cause Spike (separate PR) +## Step 2 — Recursion Root-Cause Spike > Understand and decide before designing the new pipeline. -- [ ] Trace the env↔component recursion in V1; document the call chain -- [ ] Evaluate options: pre-pass, lazy env binding, cycle detection -- [ ] Write up findings in `DECISIONS.md` +- [x] Trace the env↔component recursion in V1; document the call chain +- [x] Evaluate options: pre-pass, lazy env binding, cycle detection +- [x] Write up findings in `DECISIONS.md` (D-001) - [ ] Get alignment on the approach before Step 3 +Conclusion (see D-001): the recursion is a topological-ordering problem, not +a true cycle. Keep V1's two-pass shape (bulk load with `loadExtensions: +false`, then load extensions). Replace V1's one-level env-of-env grouping +with a proper recursive topological sort. Don't introduce lazy env binding. + --- ## Step 3 — Incremental Seams (one PR each) > Each seam is green on the diff harness before merging. No big-bang switchover. - -- [ ] Extract `LoadPlan` construction as a pure function (V1 still drives loading) +> Order is deliberate: env-DAG sort first (D-001 says it's the load-order +> primitive everything else depends on), Enrichment last (most entangled with +> the recursion). + +- [ ] Extract env-DAG topological sort (replaces `regroupEnvsIdsFromTheList`, + makes `core-aspect-env` special case unnecessary) +- [ ] Extract `LoadPlan` construction (Discovery + Resolution) as a pure + function. V1 still drives loading. - [ ] Extract Hydration as a separate concern -- [ ] Extract Enrichment as a separate concern (depends on Step 2 outcome) - [ ] Extract Assembly - [ ] Extract Execution +- [ ] Extract Enrichment (last — most likely to surface env-recursion bugs) --- From 68bf880c52d568de5fe70c7ff08ebf8a8cddc104 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 10:38:11 -0400 Subject: [PATCH 03/17] refactor(workspace): extract env-DAG layering as a pure function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Behavior-preserving extraction of regroupEnvsIdsFromTheList into groupEnvsByDepLayer with unit tests. Sets up the seam to upgrade to a proper recursive topological sort in a follow-up — see D-001. --- .../workspace-component/env-dag-sort.spec.ts | 80 +++++++++++++++++++ .../workspace-component/env-dag-sort.ts | 51 ++++++++++++ .../workspace-component-loader.ts | 43 +++------- 3 files changed, 143 insertions(+), 31 deletions(-) create mode 100644 scopes/workspace/workspace/workspace-component/env-dag-sort.spec.ts create mode 100644 scopes/workspace/workspace/workspace-component/env-dag-sort.ts diff --git a/scopes/workspace/workspace/workspace-component/env-dag-sort.spec.ts b/scopes/workspace/workspace/workspace-component/env-dag-sort.spec.ts new file mode 100644 index 000000000000..4f7f4f691272 --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/env-dag-sort.spec.ts @@ -0,0 +1,80 @@ +import { expect } from 'chai'; +import { ComponentID } from '@teambit/component-id'; +import { groupEnvsByDepLayer } from './env-dag-sort'; + +function id(idStr: string): ComponentID { + return ComponentID.fromString(idStr); +} + +describe('groupEnvsByDepLayer', () => { + it('returns both layers empty for empty input', () => { + const [deeper, shallower] = groupEnvsByDepLayer([], () => undefined, new Set()); + expect(deeper).to.deep.equal([]); + expect(shallower).to.deep.equal([]); + }); + + it('puts every env in shallower when no env-of-env relationships exist in the list', () => { + const ids = [id('teambit.react/react@1.0.0'), id('teambit.node/node@1.0.0')]; + const [deeper, shallower] = groupEnvsByDepLayer(ids, () => undefined, new Set()); + expect(deeper).to.deep.equal([]); + expect(shallower).to.have.lengthOf(2); + }); + + it('layers correctly when one env is the env of another env in the list', () => { + // ReactEnv's env is BitEnv. Both are in the list. BitEnv must load first. + const reactEnv = id('teambit.react/react@1.0.0'); + const bitEnv = id('bitdev.general/envs/bit-env@1.0.0'); + const [deeper, shallower] = groupEnvsByDepLayer( + [reactEnv, bitEnv], + (envId) => (envId.toString() === reactEnv.toString() ? bitEnv.toStringWithoutVersion() : undefined), + new Set() + ); + expect(deeper.map((c) => c.toStringWithoutVersion())).to.deep.equal(['bitdev.general/envs/bit-env']); + expect(shallower.map((c) => c.toStringWithoutVersion())).to.deep.equal(['teambit.react/react']); + }); + + it('matches env-of-env by id without version', () => { + // The lookup returns an id-without-version; we must still match the versioned id in the list. + const reactEnv = id('teambit.react/react@1.0.0'); + const bitEnv = id('bitdev.general/envs/bit-env@2.5.0'); + const [deeper] = groupEnvsByDepLayer( + [reactEnv, bitEnv], + (envId) => (envId.toString() === reactEnv.toString() ? 'bitdev.general/envs/bit-env' : undefined), + new Set() + ); + expect(deeper).to.have.lengthOf(1); + expect(deeper[0].toString()).to.equal(bitEnv.toString()); + }); + + it('skips propagating env-of-env when the env is itself a workspace-component env', () => { + // Quirk preserved from V1's regroupEnvsIdsFromTheList. See env-dag-sort.ts. + const reactEnv = id('teambit.react/react@1.0.0'); + const bitEnv = id('bitdev.general/envs/bit-env@1.0.0'); + const wsEnvs = new Set([reactEnv.toString()]); + const envOf: Record = { + [reactEnv.toString()]: bitEnv.toStringWithoutVersion(), + [bitEnv.toString()]: undefined, + }; + const [deeper, shallower] = groupEnvsByDepLayer([reactEnv, bitEnv], (envId) => envOf[envId.toString()], wsEnvs); + // Because reactEnv is in wsEnvs, the reactEnv → bitEnv edge is NOT propagated + // into envsOfEnvs. Nothing else points at bitEnv. Both envs end up in shallower. + expect(deeper).to.deep.equal([]); + expect(shallower).to.have.lengthOf(2); + }); + + it('does NOT recursively layer env-of-env-of-env (one-level limitation, by design)', () => { + // This test pins V1's known limitation. When we make the sort recursive in a follow-up, + // this test should fail and be updated. + const a = id('scope/a@1.0.0'); + const b = id('scope/b@1.0.0'); + const c = id('scope/c@1.0.0'); + const envOf: Record = { + [a.toString()]: b.toStringWithoutVersion(), + [b.toString()]: c.toStringWithoutVersion(), + }; + const [deeper, shallower] = groupEnvsByDepLayer([a, b, c], (envId) => envOf[envId.toString()], new Set()); + // Old behavior: deeper = {b, c}, shallower = {a}. We don't get [[c], [b], [a]]. + expect(deeper.map((x) => x.toStringWithoutVersion()).sort()).to.deep.equal(['scope/b', 'scope/c']); + expect(shallower.map((x) => x.toStringWithoutVersion())).to.deep.equal(['scope/a']); + }); +}); diff --git a/scopes/workspace/workspace/workspace-component/env-dag-sort.ts b/scopes/workspace/workspace/workspace-component/env-dag-sort.ts new file mode 100644 index 000000000000..c1ba5f8948f2 --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/env-dag-sort.ts @@ -0,0 +1,51 @@ +import type { ComponentID } from '@teambit/component-id'; + +/** + * Group env IDs into layers ordered by env-of-env depth. + * + * Returns `[deeper, shallower]`: + * - `deeper` — envs that are referenced as the env-of-env by some other env in + * the input list. They must load first because envs that depend on them load next. + * - `shallower` — envs not referenced as env-of-env by anyone else in the list. + * + * **Behavior-preserving extraction** of the algorithm previously inlined as + * `WorkspaceComponentLoader.regroupEnvsIdsFromTheList`. Quirks preserved: + * + * 1. Only one level of layering (envs of envs of envs is *not* handled — the + * deeper grouping isn't recursively split). See D-001 in + * `docs/rfcs/component-loading-rewrite/DECISIONS.md` for why and the plan + * to fix it. + * 2. The `envsIdsOfWsComps` filter: when an env in the input list is itself + * used directly by a workspace component, we don't propagate its + * env-of-env relationship into the deeper-layer set. The original intent + * is unclear from history; preserved as-is to avoid behavioral drift. + * Document the rationale before the next refactor touches it. + * + * @param envIds the envs to layer (typically, envs used by workspace components) + * @param getEnvOfEnv lookup: given an env, return the string id of ITS env (or undefined) + * @param envsIdsOfWsComps set of env id strings used directly by workspace components + * @returns `[deeper, shallower]` — load `deeper` first + */ +export function groupEnvsByDepLayer( + envIds: ComponentID[], + getEnvOfEnv: (id: ComponentID) => string | undefined, + envsIdsOfWsComps: Set +): [ComponentID[], ComponentID[]] { + const envsOfEnvs = new Set(); + for (const envId of envIds) { + const idStr = envId.toString(); + if (envsIdsOfWsComps.has(idStr)) continue; + const envOfEnvId = getEnvOfEnv(envId); + if (envOfEnvId) envsOfEnvs.add(envOfEnvId); + } + const deeper: ComponentID[] = []; + const shallower: ComponentID[] = []; + for (const id of envIds) { + if (envsOfEnvs.has(id.toString()) || envsOfEnvs.has(id.toStringWithoutVersion())) { + deeper.push(id); + } else { + shallower.push(id); + } + } + return [deeper, shallower]; +} diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index 18654273bd6c..88cc99e4b004 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -25,6 +25,7 @@ import type { AspectLoaderMain } from '@teambit/aspect-loader'; import type { Workspace } from '../workspace'; import { WorkspaceComponent } from './workspace-component'; import { MergeConfigConflict } from '../exceptions/merge-config-conflict'; +import { groupEnvsByDepLayer } from './env-dag-sort'; type GetManyRes = { components: Component[]; @@ -341,40 +342,20 @@ export class WorkspaceComponentLoader { } /** - * This function will get a list of envs ids and will regroup them into two groups: - * 1. envs that are envs of envs from the group - * 2. other envs (envs which are just envs of regular components of the workspace) - * For Example: - * envsIds: [ReactEnv, NodeEnv, BitEnv] - * The env of ReactEnv and NodeEnv is BitEnv - * The result will be: - * [ [BitEnv], [ReactEnv, NodeEnv] ] - * - * At the moment this function is not recursive, in the future we might want to make it recursive - * @param envIds - * @param envsIdsOfWsComps - * @returns + * Layer envs so that envs-of-envs load before their dependents. + * Thin wrapper over `groupEnvsByDepLayer` — see that function for the algorithm + * and the one-level-only limitation. */ private regroupEnvsIdsFromTheList(envIds: ComponentID[] = [], envsIdsOfWsComps: Set): Array { - const envsOfEnvs = new Set(); - envIds.forEach((envId) => { - const idStr = envId.toString(); - const fromCache = this.componentsExtensionsCache.get(idStr); - if (!fromCache || !fromCache.extensions) { - return; - } - const envOfEnvId = fromCache.envId; - if (envOfEnvId && !envsIdsOfWsComps.has(idStr)) { - envsOfEnvs.add(envOfEnvId); - } - }); - const existingEnvsOfEnvs = envIds.filter( - (id) => envsOfEnvs.has(id.toString()) || envsOfEnvs.has(id.toStringWithoutVersion()) - ); - const notExistingEnvsOfEnvs = envIds.filter( - (id) => !envsOfEnvs.has(id.toString()) && !envsOfEnvs.has(id.toStringWithoutVersion()) + return groupEnvsByDepLayer( + envIds, + (envId) => { + const fromCache = this.componentsExtensionsCache.get(envId.toString()); + if (!fromCache || !fromCache.extensions) return undefined; + return fromCache.envId; + }, + envsIdsOfWsComps ); - return [existingEnvsOfEnvs, notExistingEnvsOfEnvs]; } private regroupExtIdsFromTheList(ids: ComponentID[]): Array { From 36e47348fcb89d5089b0ec56fb297f77c685e814 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 10:57:28 -0400 Subject: [PATCH 04/17] refactor(workspace): make env-DAG layering recursive Replaces V1's one-level partition with a proper topological sort over the env-of-env DAG. Cycles fall back to a single layer defensively. Per D-001, this is the load-order primitive the rewrite depends on. --- .../workspace-component/env-dag-sort.spec.ts | 82 ++++++++----- .../workspace-component/env-dag-sort.ts | 115 +++++++++++++----- .../workspace-component-loader.ts | 3 +- 3 files changed, 139 insertions(+), 61 deletions(-) diff --git a/scopes/workspace/workspace/workspace-component/env-dag-sort.spec.ts b/scopes/workspace/workspace/workspace-component/env-dag-sort.spec.ts index 4f7f4f691272..86d6afddf24b 100644 --- a/scopes/workspace/workspace/workspace-component/env-dag-sort.spec.ts +++ b/scopes/workspace/workspace/workspace-component/env-dag-sort.spec.ts @@ -6,48 +6,50 @@ function id(idStr: string): ComponentID { return ComponentID.fromString(idStr); } +function withoutVersion(c: ComponentID): string { + return c.toStringWithoutVersion(); +} + describe('groupEnvsByDepLayer', () => { - it('returns both layers empty for empty input', () => { - const [deeper, shallower] = groupEnvsByDepLayer([], () => undefined, new Set()); - expect(deeper).to.deep.equal([]); - expect(shallower).to.deep.equal([]); + it('returns an empty array for empty input', () => { + expect(groupEnvsByDepLayer([], () => undefined, new Set())).to.deep.equal([]); }); - it('puts every env in shallower when no env-of-env relationships exist in the list', () => { + it('returns a single layer when no env-of-env relationships exist in the list', () => { const ids = [id('teambit.react/react@1.0.0'), id('teambit.node/node@1.0.0')]; - const [deeper, shallower] = groupEnvsByDepLayer(ids, () => undefined, new Set()); - expect(deeper).to.deep.equal([]); - expect(shallower).to.have.lengthOf(2); + const layers = groupEnvsByDepLayer(ids, () => undefined, new Set()); + expect(layers).to.have.lengthOf(1); + expect(layers[0]).to.have.lengthOf(2); }); it('layers correctly when one env is the env of another env in the list', () => { // ReactEnv's env is BitEnv. Both are in the list. BitEnv must load first. const reactEnv = id('teambit.react/react@1.0.0'); const bitEnv = id('bitdev.general/envs/bit-env@1.0.0'); - const [deeper, shallower] = groupEnvsByDepLayer( + const layers = groupEnvsByDepLayer( [reactEnv, bitEnv], (envId) => (envId.toString() === reactEnv.toString() ? bitEnv.toStringWithoutVersion() : undefined), new Set() ); - expect(deeper.map((c) => c.toStringWithoutVersion())).to.deep.equal(['bitdev.general/envs/bit-env']); - expect(shallower.map((c) => c.toStringWithoutVersion())).to.deep.equal(['teambit.react/react']); + expect(layers).to.have.lengthOf(2); + expect(layers[0].map(withoutVersion)).to.deep.equal(['bitdev.general/envs/bit-env']); + expect(layers[1].map(withoutVersion)).to.deep.equal(['teambit.react/react']); }); it('matches env-of-env by id without version', () => { - // The lookup returns an id-without-version; we must still match the versioned id in the list. const reactEnv = id('teambit.react/react@1.0.0'); const bitEnv = id('bitdev.general/envs/bit-env@2.5.0'); - const [deeper] = groupEnvsByDepLayer( + const layers = groupEnvsByDepLayer( [reactEnv, bitEnv], (envId) => (envId.toString() === reactEnv.toString() ? 'bitdev.general/envs/bit-env' : undefined), new Set() ); - expect(deeper).to.have.lengthOf(1); - expect(deeper[0].toString()).to.equal(bitEnv.toString()); + expect(layers).to.have.lengthOf(2); + expect(layers[0]).to.have.lengthOf(1); + expect(layers[0][0].toString()).to.equal(bitEnv.toString()); }); it('skips propagating env-of-env when the env is itself a workspace-component env', () => { - // Quirk preserved from V1's regroupEnvsIdsFromTheList. See env-dag-sort.ts. const reactEnv = id('teambit.react/react@1.0.0'); const bitEnv = id('bitdev.general/envs/bit-env@1.0.0'); const wsEnvs = new Set([reactEnv.toString()]); @@ -55,16 +57,14 @@ describe('groupEnvsByDepLayer', () => { [reactEnv.toString()]: bitEnv.toStringWithoutVersion(), [bitEnv.toString()]: undefined, }; - const [deeper, shallower] = groupEnvsByDepLayer([reactEnv, bitEnv], (envId) => envOf[envId.toString()], wsEnvs); - // Because reactEnv is in wsEnvs, the reactEnv → bitEnv edge is NOT propagated - // into envsOfEnvs. Nothing else points at bitEnv. Both envs end up in shallower. - expect(deeper).to.deep.equal([]); - expect(shallower).to.have.lengthOf(2); + const layers = groupEnvsByDepLayer([reactEnv, bitEnv], (envId) => envOf[envId.toString()], wsEnvs); + // The reactEnv → bitEnv edge is NOT propagated, so they aren't ordered. + expect(layers).to.have.lengthOf(1); + expect(layers[0]).to.have.lengthOf(2); }); - it('does NOT recursively layer env-of-env-of-env (one-level limitation, by design)', () => { - // This test pins V1's known limitation. When we make the sort recursive in a follow-up, - // this test should fail and be updated. + it('recursively layers env-of-env-of-env', () => { + // A → B → C: load C, then B, then A. const a = id('scope/a@1.0.0'); const b = id('scope/b@1.0.0'); const c = id('scope/c@1.0.0'); @@ -72,9 +72,35 @@ describe('groupEnvsByDepLayer', () => { [a.toString()]: b.toStringWithoutVersion(), [b.toString()]: c.toStringWithoutVersion(), }; - const [deeper, shallower] = groupEnvsByDepLayer([a, b, c], (envId) => envOf[envId.toString()], new Set()); - // Old behavior: deeper = {b, c}, shallower = {a}. We don't get [[c], [b], [a]]. - expect(deeper.map((x) => x.toStringWithoutVersion()).sort()).to.deep.equal(['scope/b', 'scope/c']); - expect(shallower.map((x) => x.toStringWithoutVersion())).to.deep.equal(['scope/a']); + const layers = groupEnvsByDepLayer([a, b, c], (envId) => envOf[envId.toString()], new Set()); + expect(layers.map((layer) => layer.map(withoutVersion))).to.deep.equal([['scope/c'], ['scope/b'], ['scope/a']]); + }); + + it('layers diamond shapes correctly (two envs share an env-of-env)', () => { + // A → C, B → C: C loads first, then [A, B] together. + const a = id('scope/a@1.0.0'); + const b = id('scope/b@1.0.0'); + const c = id('scope/c@1.0.0'); + const envOf: Record = { + [a.toString()]: c.toStringWithoutVersion(), + [b.toString()]: c.toStringWithoutVersion(), + }; + const layers = groupEnvsByDepLayer([a, b, c], (envId) => envOf[envId.toString()], new Set()); + expect(layers).to.have.lengthOf(2); + expect(layers[0].map(withoutVersion)).to.deep.equal(['scope/c']); + expect(layers[1].map(withoutVersion).sort()).to.deep.equal(['scope/a', 'scope/b']); + }); + + it('falls back to a single layer if a cycle is detected', () => { + // A → B → A. Should not infinite-loop. Returns [[A, B]] defensively. + const a = id('scope/a@1.0.0'); + const b = id('scope/b@1.0.0'); + const envOf: Record = { + [a.toString()]: b.toStringWithoutVersion(), + [b.toString()]: a.toStringWithoutVersion(), + }; + const layers = groupEnvsByDepLayer([a, b], (envId) => envOf[envId.toString()], new Set()); + expect(layers).to.have.lengthOf(1); + expect(layers[0]).to.have.lengthOf(2); }); }); diff --git a/scopes/workspace/workspace/workspace-component/env-dag-sort.ts b/scopes/workspace/workspace/workspace-component/env-dag-sort.ts index c1ba5f8948f2..b6b6d2d3175d 100644 --- a/scopes/workspace/workspace/workspace-component/env-dag-sort.ts +++ b/scopes/workspace/workspace/workspace-component/env-dag-sort.ts @@ -1,51 +1,104 @@ import type { ComponentID } from '@teambit/component-id'; /** - * Group env IDs into layers ordered by env-of-env depth. + * Topologically layer env IDs by env-of-env depth. * - * Returns `[deeper, shallower]`: - * - `deeper` — envs that are referenced as the env-of-env by some other env in - * the input list. They must load first because envs that depend on them load next. - * - `shallower` — envs not referenced as env-of-env by anyone else in the list. + * Returns layers in load order: layer 0 is "deepest" (no env-of-env among the + * input list) and must load first; layer N is "shallowest" and loads last. * - * **Behavior-preserving extraction** of the algorithm previously inlined as - * `WorkspaceComponentLoader.regroupEnvsIdsFromTheList`. Quirks preserved: + * Example: + * envIds: [ReactEnv, BitEnv] + * ReactEnv's env is BitEnv, BitEnv's env is not in the list + * → [[BitEnv], [ReactEnv]] (load BitEnv first) * - * 1. Only one level of layering (envs of envs of envs is *not* handled — the - * deeper grouping isn't recursively split). See D-001 in - * `docs/rfcs/component-loading-rewrite/DECISIONS.md` for why and the plan - * to fix it. - * 2. The `envsIdsOfWsComps` filter: when an env in the input list is itself - * used directly by a workspace component, we don't propagate its - * env-of-env relationship into the deeper-layer set. The original intent - * is unclear from history; preserved as-is to avoid behavioral drift. - * Document the rationale before the next refactor touches it. + * envIds: [A, B, C] + * A's env is B, B's env is C, C's env is not in the list + * → [[C], [B], [A]] (load C, then B, then A) + * + * envIds: [A, B] with no env-of-env relationships in the list + * → [[A, B]] (single layer) + * + * Quirk preserved from V1's `regroupEnvsIdsFromTheList`: when an env in the + * input list is itself used directly by a workspace component + * (`envsIdsOfWsComps`), its env-of-env edge is *not* propagated. The original + * intent is unclear from history; preserved as-is to avoid behavioral drift. + * Document the rationale before the next refactor touches it. + * + * Cycles are not expected in real env DAGs. If one is detected, the function + * returns all envs in a single layer (defensive — better than infinite recursion). * * @param envIds the envs to layer (typically, envs used by workspace components) * @param getEnvOfEnv lookup: given an env, return the string id of ITS env (or undefined) * @param envsIdsOfWsComps set of env id strings used directly by workspace components - * @returns `[deeper, shallower]` — load `deeper` first + * @returns layers in load order — `result[0]` loads first */ export function groupEnvsByDepLayer( envIds: ComponentID[], getEnvOfEnv: (id: ComponentID) => string | undefined, envsIdsOfWsComps: Set -): [ComponentID[], ComponentID[]] { - const envsOfEnvs = new Set(); - for (const envId of envIds) { - const idStr = envId.toString(); - if (envsIdsOfWsComps.has(idStr)) continue; - const envOfEnvId = getEnvOfEnv(envId); - if (envOfEnvId) envsOfEnvs.add(envOfEnvId); +): ComponentID[][] { + if (envIds.length === 0) return []; + + // Index input by both with-version and without-version, since lookups in V1 + // matched both forms. + const idIndex = new Map(); + for (const id of envIds) { + idIndex.set(id.toString(), id); + idIndex.set(id.toStringWithoutVersion(), id); } - const deeper: ComponentID[] = []; - const shallower: ComponentID[] = []; + + // For each env, its env-of-env *if that env is also in the input list*. Otherwise null. + const dependsOn = new Map(); for (const id of envIds) { - if (envsOfEnvs.has(id.toString()) || envsOfEnvs.has(id.toStringWithoutVersion())) { - deeper.push(id); - } else { - shallower.push(id); + const idStr = id.toString(); + if (envsIdsOfWsComps.has(idStr)) { + // Quirk: skip propagating the env-of-env edge when this env is a ws-comp env. + dependsOn.set(idStr, null); + continue; + } + const envOfEnvId = getEnvOfEnv(id); + const target = envOfEnvId ? idIndex.get(envOfEnvId) : undefined; + dependsOn.set(idStr, target ?? null); + } + + // Compute depth via memoized DFS, with cycle detection. + const depthCache = new Map(); + const inProgress = new Set(); + let cycleDetected = false; + + function depth(id: ComponentID): number { + const idStr = id.toString(); + if (depthCache.has(idStr)) return depthCache.get(idStr)!; + if (inProgress.has(idStr)) { + cycleDetected = true; + return 0; } + inProgress.add(idStr); + const dep = dependsOn.get(idStr); + const d = dep ? 1 + depth(dep) : 0; + inProgress.delete(idStr); + depthCache.set(idStr, d); + return d; + } + + for (const id of envIds) depth(id); + if (cycleDetected) return [envIds]; + + // Group by depth. + const byDepth = new Map(); + let maxDepth = 0; + for (const id of envIds) { + const d = depthCache.get(id.toString())!; + if (d > maxDepth) maxDepth = d; + if (!byDepth.has(d)) byDepth.set(d, []); + byDepth.get(d)!.push(id); + } + + // Emit ascending: depth 0 (deepest, must load first) → depth maxDepth (loads last). + const layers: ComponentID[][] = []; + for (let d = 0; d <= maxDepth; d++) { + const layer = byDepth.get(d); + if (layer && layer.length > 0) layers.push(layer); } - return [deeper, shallower]; + return layers; } diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index 88cc99e4b004..9b29635ad244 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -343,8 +343,7 @@ export class WorkspaceComponentLoader { /** * Layer envs so that envs-of-envs load before their dependents. - * Thin wrapper over `groupEnvsByDepLayer` — see that function for the algorithm - * and the one-level-only limitation. + * Thin wrapper over `groupEnvsByDepLayer` — see that function for the algorithm. */ private regroupEnvsIdsFromTheList(envIds: ComponentID[] = [], envsIdsOfWsComps: Set): Array { return groupEnvsByDepLayer( From 6b79ac624b9712a1c4de9091d9b0d14f48a82fcb Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 15:36:36 -0400 Subject: [PATCH 05/17] feat(workspace): add sampling to loader diff harness + bit show coverage Adds BIT_LOADER_DIFF_SAMPLE=N to skip the partner on most calls. Lets the harness run on workspaces big enough that V1+V1 in parallel would OOM the default 4GB heap. Adds a bit-show e2e (the previous "bit show isn't covered" report turned out to be a stale-compile artifact). Documents actual command coverage in SNAPSHOT-CONTRACT.md. --- .../SNAPSHOT-CONTRACT.md | 27 +++++++++++++++++++ e2e/harmony/loader-diff-harness.e2e.ts | 27 ++++++++++++++----- .../loader-diff/harness.ts | 9 +++++++ .../workspace-component/loader-diff/index.ts | 12 +++++++++ scopes/workspace/workspace/workspace.ts | 8 ++++-- 5 files changed, 75 insertions(+), 8 deletions(-) diff --git a/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md b/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md index 863766978c58..10863878fd6e 100644 --- a/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md +++ b/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md @@ -57,3 +57,30 @@ V1 and the rewritten loader and the harness reports zero diffs), add the relevant field here, update `snapshot.ts`, and rerun the V1-vs-V1 baseline. If V1-vs-V1 isn't zero on the new field, the field needs a normalization rule before it can join the contract. + +## Command coverage + +The harness wraps `Workspace.componentLoader`, so any command that loads +components via `workspace.get` / `workspace.getMany` / `workspace.getIfExist` +is covered. Confirmed: + +- `bit status` — covered (loads components, then compares against scope state). +- `bit show ` — covered (calls `host.get(id)` which routes to the loader). + +**Not covered, by design:** + +- `bit list` — only reads `consumer.bitMap.bitmapIdsFromCurrentLane`. No + components are loaded. The harness can't observe what doesn't run. + +If you find a command that loads components but doesn't trigger the harness, +that's a real gap — either the command is using a different loader path +(`consumer.loadComponents` directly, scope-only loading, etc.) or the wrapping +in `Workspace`'s constructor missed something. File it as a follow-up. + +## Sample rate (`BIT_LOADER_DIFF_SAMPLE`) + +On large workspaces (bit7 itself, ~300 components), running both loaders for +every call doubles cache footprint and can OOM Node's default 4GB heap. Set +`BIT_LOADER_DIFF_SAMPLE=10` to run the partner only every 10th call. The +trade-off: sampling can miss regressions that only manifest on the +non-sampled calls. Use the lowest sample rate the workspace can afford. diff --git a/e2e/harmony/loader-diff-harness.e2e.ts b/e2e/harmony/loader-diff-harness.e2e.ts index 110716b8e3c3..1e36fe206bd3 100644 --- a/e2e/harmony/loader-diff-harness.e2e.ts +++ b/e2e/harmony/loader-diff-harness.e2e.ts @@ -72,13 +72,25 @@ describe('loader diff harness — V1-vs-V1 baseline', function () { `expected zero diffs but got: ${JSON.stringify(dataLines(lines), null, 2)}` ).to.have.lengthOf(0); }); + + it('bit show writes a harness header and produces zero diffs', () => { + helper.command.runCmd(`bit show comp1`, helper.scopes.localPath, 'pipe', undefined, false, { + BIT_LOADER_DIFF: '1', + BIT_LOADER_DIFF_OUT: logPath, + }); + const lines = readDiffLog(logPath); + expect( + lines.filter((l) => l.header), + 'expected at least one header line' + ).to.have.length.greaterThan(0); + expect( + dataLines(lines), + `expected zero diffs but got: ${JSON.stringify(dataLines(lines), null, 2)}` + ).to.have.lengthOf(0); + }); }); - // TODO: re-enable once the harness's memory footprint is acceptable on workspaces - // with scope state. Today, running two WorkspaceComponentLoader instances in parallel - // doubles the cache footprint and can OOM Node's default 4GB heap even on tiny - // workspaces. Likely needs a sampling mode or a lighter-weight partner. - describe.skip('workspace with tagged + modified component', () => { + describe('workspace with tagged + modified component', () => { before(() => { helper.scopeHelper.setWorkspaceWithRemoteScope(); helper.fixtures.populateComponents(1); @@ -86,10 +98,13 @@ describe('loader diff harness — V1-vs-V1 baseline', function () { helper.fs.appendFile('comp1/index.js', '\n// modified after tag'); }); - it('bit status on a modified component produces zero diffs', () => { + it('bit status on a modified component produces zero diffs (sampled)', () => { + // BIT_LOADER_DIFF_SAMPLE=50 — workspaces with scope state make many loader + // calls per command; sampling caps the partner's footprint. helper.command.runCmd(`bit status`, helper.scopes.localPath, 'pipe', undefined, false, { BIT_LOADER_DIFF: '1', BIT_LOADER_DIFF_OUT: logPath, + BIT_LOADER_DIFF_SAMPLE: '50', }); const lines = readDiffLog(logPath); expect( diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts b/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts index 27c22b407a75..9dd79d3d9462 100644 --- a/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts +++ b/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts @@ -22,6 +22,12 @@ export interface LoaderDiffHarnessOptions { outputPath?: string; /** Tag written into each log line, e.g. "v1-vs-v1" or "v1-vs-v2". */ comparisonLabel: string; + /** + * Sample rate. If `N > 1`, only every Nth call runs the partner loader. + * Reduces overhead on large workspaces where running both loaders for every + * call would OOM Node's default heap. Default: 1 (every call). + */ + sampleEvery?: number; } /** @@ -38,6 +44,7 @@ export class LoaderDiffHarness { private partner: WorkspaceComponentLoader | null = null; private readonly outputPath: string; private readonly comparisonLabel: string; + private readonly sampleEvery: number; private callIndex = 0; private writeFailureLogged = false; @@ -49,6 +56,7 @@ export class LoaderDiffHarness { ) { this.outputPath = options.outputPath ?? path.join(os.tmpdir(), 'bit-loader-diff.jsonl'); this.comparisonLabel = options.comparisonLabel; + this.sampleEvery = Math.max(1, Math.floor(options.sampleEvery ?? 1)); this.writeHeader(); } @@ -116,6 +124,7 @@ export class LoaderDiffHarness { runPartner: () => Promise ): Promise { const callId = this.callIndex++; + if (callId % this.sampleEvery !== 0) return; let partnerComponents: Component[]; try { partnerComponents = await runPartner(); diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/index.ts b/scopes/workspace/workspace/workspace-component/loader-diff/index.ts index 015292aa2f1f..b51a2dbda5ce 100644 --- a/scopes/workspace/workspace/workspace-component/loader-diff/index.ts +++ b/scopes/workspace/workspace/workspace-component/loader-diff/index.ts @@ -16,3 +16,15 @@ export function loaderDiffMode(): string | null { if (raw === '1' || raw.toLowerCase() === 'true') return 'v1-vs-v1'; return raw; } + +/** + * Returns the sample rate from `BIT_LOADER_DIFF_SAMPLE`. Default 1 (every call). + * Use larger values on workspaces big enough that running both loaders for every + * call would OOM, e.g. `BIT_LOADER_DIFF_SAMPLE=10`. + */ +export function loaderDiffSampleEvery(): number { + const raw = process.env.BIT_LOADER_DIFF_SAMPLE; + if (!raw) return 1; + const parsed = Number.parseInt(raw, 10); + return Number.isFinite(parsed) && parsed >= 1 ? parsed : 1; +} diff --git a/scopes/workspace/workspace/workspace.ts b/scopes/workspace/workspace/workspace.ts index d119067470ad..4b68f7b17880 100644 --- a/scopes/workspace/workspace/workspace.ts +++ b/scopes/workspace/workspace/workspace.ts @@ -112,7 +112,7 @@ import { CompFiles } from './workspace-component/comp-files'; import { Filter } from './filter'; import type { ComponentStatusLegacy, ComponentStatusResult } from './workspace-component/component-status-loader'; import { ComponentStatusLoader } from './workspace-component/component-status-loader'; -import { LoaderDiffHarness, loaderDiffMode } from './workspace-component/loader-diff'; +import { LoaderDiffHarness, loaderDiffMode, loaderDiffSampleEvery } from './workspace-component/loader-diff'; import execa from 'execa'; import { getGitExecutablePath } from '@teambit/git.modules.git-executable'; import { VERSION_ZERO } from '@teambit/objects'; @@ -273,7 +273,11 @@ export class Workspace implements ComponentFactory { primaryLoader, () => new WorkspaceComponentLoader(this, logger, dependencyResolver, envs, aspectLoader), logger, - { comparisonLabel: diffMode, outputPath: process.env.BIT_LOADER_DIFF_OUT } + { + comparisonLabel: diffMode, + outputPath: process.env.BIT_LOADER_DIFF_OUT, + sampleEvery: loaderDiffSampleEvery(), + } ) as unknown as WorkspaceComponentLoader; } else { this.componentLoader = primaryLoader; From 883adcc14a80c1e24efc9276388f2d377bd49c6f Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 15:46:12 -0400 Subject: [PATCH 06/17] refactor(workspace): drop hardcoded core-aspect-env special case in buildLoadGroups D-001 predicted that the recursive env-DAG sort would make the prepended 'always load core-aspect-env first' workaround unnecessary. Verified: 311-component bit7 workspace (the workspace the special case was written for) loads cleanly without it. --- .../workspace-component-loader.ts | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index 9b29635ad244..73a6de3394d0 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -301,29 +301,16 @@ export class WorkspaceComponentLoader { const groupsToHandle = [ // Always load first core envs { ids: groupedByIsCoreEnvs.true || [], core: true, aspects: true, seeders: true, envs: true }, - // { ids: groupedByIsEnvOfWsComps.true || [], core: false, aspects: true, seeders: false, envs: true }, + // The recursive env-DAG sort in `groupEnvsByDepLayer` places envs at the + // bottom of the chain (e.g. `core-aspect-env`/`core-aspect-env-jest`) in the + // earliest layer naturally. The hardcoded `core-aspect-env` prepending that + // used to live here is redundant under the recursive sort — see D-001. ...layeredEnvsGroups, { ids: extsNotFromTheList || [], core: false, aspects: true, seeders: false, envs: false }, ...layeredExtGroups, { ids: groupedByIsExtOfAnother.false || [], core: false, aspects: false, seeders: true, envs: false }, ]; - // This is a special use case mostly for the bit core repo - const envsOfCoreAspectEnv = ['teambit.harmony/envs/core-aspect-env', 'teambit.harmony/envs/core-aspect-env-jest']; - const coreAspectEnvGroup = { ids: [], core: true, aspects: true, seeders: true, envs: true }; - layeredEnvsGroups.forEach((group) => { - const filteredIds = group.ids.filter((id) => envsOfCoreAspectEnv.includes(id.toStringWithoutVersion())); - if (filteredIds.length) { - // @ts-ignore - coreAspectEnvGroup.ids.push(...filteredIds); - } - }); - if (coreAspectEnvGroup.ids.length) { - // enter first in the list - groupsToHandle.unshift(coreAspectEnvGroup); - } - // END of bit repo special use case - const groupsByWsScope = groupsToHandle.map((group) => { if (!group.ids?.length) return undefined; const groupedByWsScope = groupBy(group.ids, (id) => { From 0b9905a6b30e880a538a635297fafd1f1742c5c1 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 16:03:56 -0400 Subject: [PATCH 07/17] refactor(workspace): layer extensions topologically (was a no-op TODO) Extracts a generic topo-sort helper (topoLayerByDeps) and uses it for both envs (refactored to share) and extensions. regroupExtIdsFromTheList was a documented no-op TODO; this implements it. Same shape fix as #10350 for envs, applied to ext-of-ext chains. --- .../workspace-component/dep-dag-sort.spec.ts | 116 ++++++++++++++++++ .../workspace-component/dep-dag-sort.ts | 113 +++++++++++++++++ .../workspace-component/env-dag-sort.ts | 72 ++--------- .../workspace-component-loader.ts | 29 +++-- 4 files changed, 260 insertions(+), 70 deletions(-) create mode 100644 scopes/workspace/workspace/workspace-component/dep-dag-sort.spec.ts create mode 100644 scopes/workspace/workspace/workspace-component/dep-dag-sort.ts diff --git a/scopes/workspace/workspace/workspace-component/dep-dag-sort.spec.ts b/scopes/workspace/workspace/workspace-component/dep-dag-sort.spec.ts new file mode 100644 index 000000000000..713e0d45ca24 --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/dep-dag-sort.spec.ts @@ -0,0 +1,116 @@ +import { expect } from 'chai'; +import { ComponentID } from '@teambit/component-id'; +import { groupExtsByDepLayer, topoLayerByDeps } from './dep-dag-sort'; + +function id(idStr: string): ComponentID { + return ComponentID.fromString(idStr); +} + +function withoutVersion(c: ComponentID): string { + return c.toStringWithoutVersion(); +} + +describe('groupExtsByDepLayer', () => { + it('returns an empty array for empty input', () => { + expect(groupExtsByDepLayer([], () => [])).to.deep.equal([]); + }); + + it('returns a single layer when no ext-of-ext relationships exist in the list', () => { + const ids = [id('scope/ext-a@1.0.0'), id('scope/ext-b@1.0.0')]; + const layers = groupExtsByDepLayer(ids, () => []); + expect(layers).to.have.lengthOf(1); + expect(layers[0]).to.have.lengthOf(2); + }); + + it('layers extA before its dep extB when extA depends on extB', () => { + const extA = id('scope/ext-a@1.0.0'); + const extB = id('scope/ext-b@1.0.0'); + const layers = groupExtsByDepLayer([extA, extB], (i) => + i.toString() === extA.toString() ? [extB.toStringWithoutVersion()] : [] + ); + expect(layers.map((l) => l.map(withoutVersion))).to.deep.equal([['scope/ext-b'], ['scope/ext-a']]); + }); + + it('handles multi-level chains (extA → extB → extC)', () => { + const a = id('scope/a@1.0.0'); + const b = id('scope/b@1.0.0'); + const c = id('scope/c@1.0.0'); + const deps: Record = { + [a.toString()]: [b.toStringWithoutVersion()], + [b.toString()]: [c.toStringWithoutVersion()], + }; + const layers = groupExtsByDepLayer([a, b, c], (i) => deps[i.toString()] ?? []); + expect(layers.map((l) => l.map(withoutVersion))).to.deep.equal([['scope/c'], ['scope/b'], ['scope/a']]); + }); + + it('handles diamonds (extA + extB both depend on extC)', () => { + const a = id('scope/a@1.0.0'); + const b = id('scope/b@1.0.0'); + const c = id('scope/c@1.0.0'); + const deps: Record = { + [a.toString()]: [c.toStringWithoutVersion()], + [b.toString()]: [c.toStringWithoutVersion()], + }; + const layers = groupExtsByDepLayer([a, b, c], (i) => deps[i.toString()] ?? []); + expect(layers).to.have.lengthOf(2); + expect(layers[0].map(withoutVersion)).to.deep.equal(['scope/c']); + expect(layers[1].map(withoutVersion).sort()).to.deep.equal(['scope/a', 'scope/b']); + }); + + it('handles fan-out: one extension with multiple deps', () => { + // extA depends on both extB and extC. + const a = id('scope/a@1.0.0'); + const b = id('scope/b@1.0.0'); + const c = id('scope/c@1.0.0'); + const deps: Record = { + [a.toString()]: [b.toStringWithoutVersion(), c.toStringWithoutVersion()], + }; + const layers = groupExtsByDepLayer([a, b, c], (i) => deps[i.toString()] ?? []); + expect(layers).to.have.lengthOf(2); + expect(layers[0].map(withoutVersion).sort()).to.deep.equal(['scope/b', 'scope/c']); + expect(layers[1].map(withoutVersion)).to.deep.equal(['scope/a']); + }); + + it('ignores deps not present in the input list', () => { + const a = id('scope/a@1.0.0'); + const layers = groupExtsByDepLayer([a], () => ['scope/not-in-list']); + expect(layers).to.have.lengthOf(1); + expect(layers[0]).to.have.lengthOf(1); + }); + + it('falls back to a single layer if a cycle is detected', () => { + const a = id('scope/a@1.0.0'); + const b = id('scope/b@1.0.0'); + const deps: Record = { + [a.toString()]: [b.toStringWithoutVersion()], + [b.toString()]: [a.toStringWithoutVersion()], + }; + const layers = groupExtsByDepLayer([a, b], (i) => deps[i.toString()] ?? []); + expect(layers).to.have.lengthOf(1); + expect(layers[0]).to.have.lengthOf(2); + }); +}); + +describe('topoLayerByDeps (generic)', () => { + it('layers strings by dependency depth', () => { + const items = ['a', 'b', 'c']; + const deps: Record = { a: ['b'], b: ['c'] }; + const layers = topoLayerByDeps( + items, + (s) => s, + (s) => [s], + (s) => deps[s] ?? [] + ); + expect(layers).to.deep.equal([['c'], ['b'], ['a']]); + }); + + it('handles self-references gracefully (treats as no dependency)', () => { + const layers = topoLayerByDeps( + ['a'], + (s) => s, + (s) => [s], + (s) => [s] + ); + expect(layers).to.deep.equal([['a']]); + }); +}); diff --git a/scopes/workspace/workspace/workspace-component/dep-dag-sort.ts b/scopes/workspace/workspace/workspace-component/dep-dag-sort.ts new file mode 100644 index 000000000000..7d1ec65188d0 --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/dep-dag-sort.ts @@ -0,0 +1,113 @@ +import type { ComponentID } from '@teambit/component-id'; + +/** + * Topologically layer a set of items by dependency depth. + * + * Layer 0 is "deepest" — items that nothing else in the input depends on + * (or whose deps aren't in the input). They load first. Layer N is items + * whose deps are at most N hops away in the input. + * + * Cycles are not expected for component DAGs (envs, extensions). If one is + * detected, the function returns a single layer containing all items — + * defensive against infinite recursion, never silently produces partial output. + * + * @param items the items to layer + * @param keyOf primary lookup key for an item (typically `id.toString()`) + * @param keyVariantsOf alternative keys an item may be referenced by + * (e.g. `[id.toString(), id.toStringWithoutVersion()]`) + * @param getDepKeys for an item, return the keys of its dependencies. Keys not + * present in the input are ignored. May return any of the + * variants accepted by `keyVariantsOf`. + */ +export function topoLayerByDeps( + items: T[], + keyOf: (item: T) => string, + keyVariantsOf: (item: T) => string[], + getDepKeys: (item: T) => string[] +): T[][] { + if (items.length === 0) return []; + + const itemByKey = new Map(); + for (const item of items) { + for (const variant of keyVariantsOf(item)) itemByKey.set(variant, item); + } + + // For each item, the subset of its declared deps that are present in the input. + const depsInInput = new Map(); + for (const item of items) { + const deps = getDepKeys(item) + .map((depKey) => itemByKey.get(depKey)) + .filter((dep): dep is T => dep !== undefined && dep !== item); + depsInInput.set(keyOf(item), deps); + } + + const depthCache = new Map(); + const inProgress = new Set(); + let cycleDetected = false; + + const computeDepth = (item: T): number => { + const k = keyOf(item); + const cached = depthCache.get(k); + if (cached !== undefined) return cached; + if (inProgress.has(k)) { + cycleDetected = true; + return 0; + } + inProgress.add(k); + const deps = depsInInput.get(k) ?? []; + let maxDepDepth = -1; + for (const dep of deps) { + const d = computeDepth(dep); + if (d > maxDepDepth) maxDepDepth = d; + } + inProgress.delete(k); + const depth = maxDepDepth + 1; + depthCache.set(k, depth); + return depth; + }; + + for (const item of items) computeDepth(item); + if (cycleDetected) return [items]; + + const byDepth = new Map(); + let maxDepth = 0; + for (const item of items) { + const d = depthCache.get(keyOf(item))!; + if (d > maxDepth) maxDepth = d; + if (!byDepth.has(d)) byDepth.set(d, []); + byDepth.get(d)!.push(item); + } + + const layers: T[][] = []; + for (let d = 0; d <= maxDepth; d++) { + const layer = byDepth.get(d); + if (layer && layer.length > 0) layers.push(layer); + } + return layers; +} + +const componentIdKeyVariants = (id: ComponentID): string[] => [id.toString(), id.toStringWithoutVersion()]; + +/** + * Layer extension IDs by extension-of-extension depth. + * + * Example: compA's extensions list contains extA. extA's own extensions list + * contains extB. Loading order: extB → extA → compA. This function takes the + * input `[extA, extB]` and returns `[[extB], [extA]]` so the caller can load + * each layer in turn. + * + * @param extIds the extensions to layer + * @param getExtsOfExt for an extension, return the string ids of its own extensions + * (with-version or without-version both accepted) + */ +export function groupExtsByDepLayer( + extIds: ComponentID[], + getExtsOfExt: (id: ComponentID) => string[] +): ComponentID[][] { + return topoLayerByDeps( + extIds, + (id) => id.toString(), + componentIdKeyVariants, + (id) => getExtsOfExt(id) + ); +} diff --git a/scopes/workspace/workspace/workspace-component/env-dag-sort.ts b/scopes/workspace/workspace/workspace-component/env-dag-sort.ts index b6b6d2d3175d..02e1aaab5ce2 100644 --- a/scopes/workspace/workspace/workspace-component/env-dag-sort.ts +++ b/scopes/workspace/workspace/workspace-component/env-dag-sort.ts @@ -1,4 +1,5 @@ import type { ComponentID } from '@teambit/component-id'; +import { topoLayerByDeps } from './dep-dag-sort'; /** * Topologically layer env IDs by env-of-env depth. @@ -37,68 +38,15 @@ export function groupEnvsByDepLayer( getEnvOfEnv: (id: ComponentID) => string | undefined, envsIdsOfWsComps: Set ): ComponentID[][] { - if (envIds.length === 0) return []; - - // Index input by both with-version and without-version, since lookups in V1 - // matched both forms. - const idIndex = new Map(); - for (const id of envIds) { - idIndex.set(id.toString(), id); - idIndex.set(id.toStringWithoutVersion(), id); - } - - // For each env, its env-of-env *if that env is also in the input list*. Otherwise null. - const dependsOn = new Map(); - for (const id of envIds) { - const idStr = id.toString(); - if (envsIdsOfWsComps.has(idStr)) { + return topoLayerByDeps( + envIds, + (id) => id.toString(), + (id) => [id.toString(), id.toStringWithoutVersion()], + (id) => { // Quirk: skip propagating the env-of-env edge when this env is a ws-comp env. - dependsOn.set(idStr, null); - continue; - } - const envOfEnvId = getEnvOfEnv(id); - const target = envOfEnvId ? idIndex.get(envOfEnvId) : undefined; - dependsOn.set(idStr, target ?? null); - } - - // Compute depth via memoized DFS, with cycle detection. - const depthCache = new Map(); - const inProgress = new Set(); - let cycleDetected = false; - - function depth(id: ComponentID): number { - const idStr = id.toString(); - if (depthCache.has(idStr)) return depthCache.get(idStr)!; - if (inProgress.has(idStr)) { - cycleDetected = true; - return 0; + if (envsIdsOfWsComps.has(id.toString())) return []; + const envOfEnv = getEnvOfEnv(id); + return envOfEnv ? [envOfEnv] : []; } - inProgress.add(idStr); - const dep = dependsOn.get(idStr); - const d = dep ? 1 + depth(dep) : 0; - inProgress.delete(idStr); - depthCache.set(idStr, d); - return d; - } - - for (const id of envIds) depth(id); - if (cycleDetected) return [envIds]; - - // Group by depth. - const byDepth = new Map(); - let maxDepth = 0; - for (const id of envIds) { - const d = depthCache.get(id.toString())!; - if (d > maxDepth) maxDepth = d; - if (!byDepth.has(d)) byDepth.set(d, []); - byDepth.get(d)!.push(id); - } - - // Emit ascending: depth 0 (deepest, must load first) → depth maxDepth (loads last). - const layers: ComponentID[][] = []; - for (let d = 0; d <= maxDepth; d++) { - const layer = byDepth.get(d); - if (layer && layer.length > 0) layers.push(layer); - } - return layers; + ); } diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index 73a6de3394d0..3002df75a046 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -26,6 +26,7 @@ import type { Workspace } from '../workspace'; import { WorkspaceComponent } from './workspace-component'; import { MergeConfigConflict } from '../exceptions/merge-config-conflict'; import { groupEnvsByDepLayer } from './env-dag-sort'; +import { groupExtsByDepLayer } from './dep-dag-sort'; type GetManyRes = { components: Component[]; @@ -344,14 +345,26 @@ export class WorkspaceComponentLoader { ); } - private regroupExtIdsFromTheList(ids: ComponentID[]): Array { - // TODO: implement this function - // this should handle a case when you have: - // compA that has extA and that extA has extB - // in that case we now get the following group: - // ids: [extA, extB] - // while we need extB to be in a different group before extA - return [ids]; + /** + * Layer extension IDs so that extensions-of-extensions load before their dependents. + * Thin wrapper over `groupExtsByDepLayer` — see that function for the algorithm. + * + * Resolves an extension's own extensions via `componentsExtensionsCache`, which + * `populateScopeAndExtensionsCache` populates for all extensions in the load list. + * Returns extension ids in both with-version and without-version forms; the + * topo-sort matches either against the input list. + */ + private regroupExtIdsFromTheList(ids: ComponentID[] = []): Array { + return groupExtsByDepLayer(ids, (extId) => { + const fromCache = this.componentsExtensionsCache.get(extId.toString()); + if (!fromCache?.extensions) return []; + const depKeys: string[] = []; + for (const ext of fromCache.extensions) { + if (ext.stringId) depKeys.push(ext.stringId); + if (ext.newExtensionId) depKeys.push(ext.newExtensionId.toStringWithoutVersion()); + } + return depKeys; + }); } private async getAndLoadSlot( From c287ab3b1737639b5a8f29fae530a62d8a763794 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 16:10:54 -0400 Subject: [PATCH 08/17] =?UTF-8?q?feat(workspace):=20expand=20snapshot=20co?= =?UTF-8?q?ntract=20=E2=80=94=20extensions=20and=20aspects=20(config=20onl?= =?UTF-8?q?y)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds extensions[].config and aspects[].config to the diff snapshot, sorted by id with stable JSON canonicalization. Excludes data because it depends on cache warmth — the partner's cold cache eagerly computes deps that the primary's warm cache short-circuits, producing false positives in sampled mode. Documented the trade-off in SNAPSHOT-CONTRACT.md. --- .../SNAPSHOT-CONTRACT.md | 43 ++++++++----- .../workspace-component/loader-diff/diff.ts | 44 +++++++------ .../loader-diff/snapshot.ts | 61 ++++++++++++++++++- 3 files changed, 112 insertions(+), 36 deletions(-) diff --git a/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md b/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md index 10863878fd6e..ab6c0538b80e 100644 --- a/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md +++ b/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md @@ -18,28 +18,41 @@ something depends on it is not — so err on the side of starting minimal. - Timestamp fields are excluded. - File contents are excluded — too large, and orthogonal to the loader's job. -## Fields (v0 — minimal) +## Fields (current) -| Field | Source | Notes | -| -------------- | ------------------------------------- | ------------------------------------------- | -| `id` | `component.id.toString()` | Full id including scope and version | -| `head` | `component.head?.toString() ?? null` | Scope HEAD hash, or null for new components | -| `tags` | sorted versions from `component.tags` | Tag versions present in scope | -| `extensionIds` | sorted aspect ids from `state.config` | Just the ids first; data added in v1 | +| Field | Source | Notes | +| -------------- | ------------------------------------------------------ | -------------------------------------------- | +| `id` | `component.id.toString()` | Full id including scope and version | +| `head` | `component.head?.hash ?? null` | Scope HEAD hash, or null for new components | +| `tags` | sorted versions from `component.tags` | Tag versions present in scope | +| `extensionIds` | sorted aspect ids from `state.config.extensions` | Just the ids | +| `extensions` | `state.config.extensions`, sorted by id, `config` only | Pre-slot static configuration | +| `aspects` | `state.aspects.entries`, sorted by id, `config` only | Post-slot aspects (configs only — see below) | -## Fields (v1 — once v0 is stable on V1-vs-V1) +### Why `data` is excluded -| Field | Source | Notes | -| ------------ | ---------------------------------------------------- | ------------------------------------------- | -| `extensions` | `state.config.extensions`, sorted by id, with `data` | Full extension payloads, JSON-stable | -| `envId` | env descriptor id | Resolved env, e.g. `teambit.harmony/aspect` | -| `envType` | env descriptor type | | -| `aspects` | `state.aspects` entries, sorted by id, with `config` | Post-`onComponentLoad` slot state | +`data` is the mutable post-load state populated by `onComponentLoad` slots, +the dep resolver, and other side effects during loading. It depends on cache +warmth: a cold-cache load eagerly computes resolved dependencies, a warm-cache +load short-circuits. The harness's primary and partner have different cache +states when sampling is enabled (the partner only runs on sampled calls, so +its cache lags the primary's). Comparing `data` produces noisy false positives. -## Fields (v2 — once v1 is stable) +`config` is the static configuration supplied via `workspace.jsonc` and the +component's own config — stable across cache states. That's what we compare. + +Trade-off: changes to how `data` is computed by the rewrite won't be caught +by the snapshot. Catching those will require either (a) a comparison +mechanism that runs both loaders cold-cache for each call, or (b) fixing +V1's cache-state-dependent eager-vs-lazy behavior so warm and cold loads +produce identical `data`. Both are post-rewrite concerns. + +## Fields (planned — post-rewrite) | Field | Source | Notes | | -------------- | ------------------------------------- | ------------------------------------------------ | +| `envId` | env descriptor id | Resolved env, e.g. `teambit.harmony/aspect` | +| `envType` | env descriptor type | | | `dependencies` | from `dep-resolver` data on component | Sorted by package name; capture version + scope | | `isModified` | `component.isModified()` | The single most failure-prone consumer of loader | diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/diff.ts b/scopes/workspace/workspace/workspace-component/loader-diff/diff.ts index cb69c5d6867d..9d6afe647f38 100644 --- a/scopes/workspace/workspace/workspace-component/loader-diff/diff.ts +++ b/scopes/workspace/workspace/workspace-component/loader-diff/diff.ts @@ -1,7 +1,7 @@ -import type { NormalizedSnapshot } from './snapshot'; +import type { AspectSnapshot, NormalizedSnapshot } from './snapshot'; export interface FieldDiff { - field: keyof NormalizedSnapshot | `${keyof NormalizedSnapshot}[${number}]`; + field: string; primary: unknown; partner: unknown; } @@ -13,33 +13,21 @@ export interface SnapshotDiff { /** * Diff a single pair of snapshots. Returns null if identical. - * - * Both snapshots are produced by serializeComponentForDiff and are already - * normalized (sorted arrays, stable key order), so a structural compare is - * sufficient. */ export function diffSnapshots(primary: NormalizedSnapshot, partner: NormalizedSnapshot): SnapshotDiff | null { const fields: FieldDiff[] = []; - if (primary.id !== partner.id) { - fields.push({ field: 'id', primary: primary.id, partner: partner.id }); - } - if (primary.head !== partner.head) { - fields.push({ field: 'head', primary: primary.head, partner: partner.head }); - } + if (primary.id !== partner.id) fields.push({ field: 'id', primary: primary.id, partner: partner.id }); + if (primary.head !== partner.head) fields.push({ field: 'head', primary: primary.head, partner: partner.head }); diffStringArray('tags', primary.tags, partner.tags, fields); diffStringArray('extensionIds', primary.extensionIds, partner.extensionIds, fields); + diffAspectArray('extensions', primary.extensions, partner.extensions, fields); + diffAspectArray('aspects', primary.aspects, partner.aspects, fields); if (fields.length === 0) return null; return { id: primary.id, fields }; } -/** - * Diff a result set: lists of snapshots from primary and partner. - * - * Components present on one side but not the other are reported as a - * "missing" diff with the full snapshot in the present-side field. - */ export interface ResultDiff { missingFromPartner: NormalizedSnapshot[]; missingFromPrimary: NormalizedSnapshot[]; @@ -77,8 +65,26 @@ export function isResultDiffEmpty(diff: ResultDiff): boolean { ); } -function diffStringArray(field: keyof NormalizedSnapshot, a: string[], b: string[], out: FieldDiff[]): void { +function diffStringArray(field: string, a: string[], b: string[], out: FieldDiff[]): void { if (a.length !== b.length || a.some((v, i) => v !== b[i])) { out.push({ field, primary: a, partner: b }); } } + +function diffAspectArray(field: string, a: AspectSnapshot[], b: AspectSnapshot[], out: FieldDiff[]): void { + if (a.length !== b.length) { + out.push({ field, primary: a.map((x) => x.id), partner: b.map((x) => x.id) }); + return; + } + for (let i = 0; i < a.length; i++) { + const ax = a[i]; + const bx = b[i]; + if (ax.id !== bx.id) { + out.push({ field: `${field}[${i}].id`, primary: ax.id, partner: bx.id }); + continue; + } + if (ax.config !== bx.config) { + out.push({ field: `${field}[${ax.id}].config`, primary: ax.config, partner: bx.config }); + } + } +} diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/snapshot.ts b/scopes/workspace/workspace/workspace-component/loader-diff/snapshot.ts index c96b5a0db964..15d7a4d2cae4 100644 --- a/scopes/workspace/workspace/workspace-component/loader-diff/snapshot.ts +++ b/scopes/workspace/workspace/workspace-component/loader-diff/snapshot.ts @@ -7,22 +7,40 @@ import type { Component } from '@teambit/component'; * * Anything in the snapshot must match between two loader implementations. * Anything not in the snapshot, the new loader is free to change. - * - * Start minimal. Add fields as we discover what matters. */ export interface NormalizedSnapshot { id: string; head: string | null; tags: string[]; extensionIds: string[]; + /** Pre-slot config (`state.config.extensions`), sorted by id, with config + data. */ + extensions: AspectSnapshot[]; + /** Post-slot aspects (`state.aspects.entries`), sorted by id, with config + data. */ + aspects: AspectSnapshot[]; } +export interface AspectSnapshot { + id: string; + /** JSON-canonical form of the entry's config. */ + config: string; +} + +// `data` (the mutable post-load state populated by aspect slots and dep +// resolution) is intentionally NOT included. In V1, the contents of `data` +// depend on cache warmth — a cold-cache load eagerly computes dependencies, +// a warm-cache load short-circuits. The harness's primary and partner will +// have different cache states when sampling is enabled, so comparing `data` +// produces noisy false positives. `config` is stable across cache states. +// See SNAPSHOT-CONTRACT.md. + export function serializeComponentForDiff(component: Component): NormalizedSnapshot { return { id: component.id.toString(), head: component.head?.hash ?? null, tags: collectTags(component), extensionIds: collectExtensionIds(component), + extensions: collectExtensions(component), + aspects: collectAspects(component), }; } @@ -43,3 +61,42 @@ function collectExtensionIds(component: Component): string[] { } return ids.sort(); } + +function collectExtensions(component: Component): AspectSnapshot[] { + const extensions = component.state?.config?.extensions; + if (!extensions) return []; + const out: AspectSnapshot[] = []; + for (const entry of extensions) { + if (!entry.stringId) continue; + out.push({ id: entry.stringId, config: canonicalJson(entry.config) }); + } + return out.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0)); +} + +function collectAspects(component: Component): AspectSnapshot[] { + const aspects = component.state?.aspects; + if (!aspects) return []; + const out: AspectSnapshot[] = []; + for (const entry of aspects.entries) { + out.push({ id: entry.id.toString(), config: canonicalJson(entry.config) }); + } + return out.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0)); +} + +/** + * Stable JSON serialization: sorts object keys at every level so two + * structurally-equal objects produce the same string regardless of insertion order. + * Non-serializable values (functions, undefined, BigInt, etc.) are dropped or coerced + * by JSON.stringify the same way on both sides — fine for V1-vs-V1 diffing. + */ +function canonicalJson(value: unknown): string { + if (value === undefined || value === null) return 'null'; + return JSON.stringify(value, (_key, val) => { + if (val && typeof val === 'object' && !Array.isArray(val)) { + const sorted: Record = {}; + for (const k of Object.keys(val).sort()) sorted[k] = (val as Record)[k]; + return sorted; + } + return val; + }); +} From e6dadb63faf20822522f7955034b002ddc96bb11 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 16:21:50 -0400 Subject: [PATCH 09/17] refactor(workspace): extract buildLoadGroups as a pure function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pulls the canonical group-ordering logic out of WorkspaceComponentLoader as buildLoadPlanGroups in load-plan.ts. The loader retains the side-effecting parts (warming the extensions cache iteratively, registering newly-discovered extension comp ids) and calls into the pure function for the actual ordering. Removes the now-redundant regroupEnvsIdsFromTheList / regroupExtIdsFromTheList wrappers — load-plan.ts calls groupEnvsByDepLayer / groupExtsByDepLayer directly. --- .../workspace-component/load-plan.spec.ts | 109 +++++++++++ .../workspace-component/load-plan.ts | 143 ++++++++++++++ .../workspace-component-loader.ts | 177 +++++------------- 3 files changed, 296 insertions(+), 133 deletions(-) create mode 100644 scopes/workspace/workspace/workspace-component/load-plan.spec.ts create mode 100644 scopes/workspace/workspace/workspace-component/load-plan.ts diff --git a/scopes/workspace/workspace/workspace-component/load-plan.spec.ts b/scopes/workspace/workspace/workspace-component/load-plan.spec.ts new file mode 100644 index 000000000000..8078f8783e6d --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/load-plan.spec.ts @@ -0,0 +1,109 @@ +import { expect } from 'chai'; +import { ComponentID } from '@teambit/component-id'; +import { buildLoadPlanGroups } from './load-plan'; +import type { LoadPlanInput } from './load-plan'; + +function id(idStr: string): ComponentID { + return ComponentID.fromString(idStr); +} + +interface MockComp { + id: ComponentID; + isCoreEnv?: boolean; + envId?: string; + extensions?: Array<{ stringId: string; newExtensionId?: ComponentID }>; +} + +function inputFor(workspaceComps: MockComp[], scopeComps: MockComp[] = []): LoadPlanInput { + const all = [...workspaceComps, ...scopeComps]; + const byKey = new Map(); + for (const c of all) { + byKey.set(c.id.toString(), c); + byKey.set(c.id.toStringWithoutVersion(), c); + } + return { + workspaceIds: workspaceComps.map((c) => c.id), + scopeIds: scopeComps.map((c) => c.id), + isCoreEnv: (cid) => byKey.get(cid.toString())?.isCoreEnv ?? false, + extensionsOf: (cid) => byKey.get(cid.toString())?.extensions ?? [], + envIdOf: (cid) => byKey.get(cid.toString())?.envId, + }; +} + +describe('buildLoadPlanGroups', () => { + it('returns no groups when input is empty', () => { + const result = buildLoadPlanGroups(inputFor([])); + expect(result.groups).to.deep.equal([]); + expect(result.extraExtensionIds).to.deep.equal([]); + }); + + it('puts core envs in the first group', () => { + const coreEnv = { id: id('teambit.harmony/aspect@1.0.0'), isCoreEnv: true }; + const regular = { id: id('scope/comp1@1.0.0') }; + const result = buildLoadPlanGroups(inputFor([coreEnv, regular])); + expect(result.groups[0].core).to.equal(true); + expect(result.groups[0].ids.map((c) => c.toStringWithoutVersion())).to.deep.equal(['teambit.harmony/aspect']); + }); + + it('places regular components (no env, no ext-of-others) in the last group', () => { + const comp = { id: id('scope/regular@1.0.0') }; + const result = buildLoadPlanGroups(inputFor([comp])); + expect(result.groups).to.have.lengthOf(1); + const last = result.groups[result.groups.length - 1]; + expect(last.envs).to.equal(false); + expect(last.aspects).to.equal(false); + }); + + it("layers a workspace component's env before the component", () => { + const env = { id: id('scope/my-env@1.0.0') }; + const comp = { id: id('scope/my-comp@1.0.0'), envId: env.id.toStringWithoutVersion() }; + // The component lists my-env in its extensions so it gets discovered as an ext component. + comp.extensions = [{ stringId: env.id.toString(), newExtensionId: env.id }]; + const result = buildLoadPlanGroups(inputFor([env, comp])); + + const envIdx = result.groups.findIndex((g) => g.envs && g.ids.some((c) => c.toString() === env.id.toString())); + const compIdx = result.groups.findIndex((g) => g.ids.some((c) => c.toString() === comp.id.toString())); + expect(envIdx).to.be.greaterThan(-1); + expect(compIdx).to.be.greaterThan(-1); + expect(envIdx).to.be.lessThan(compIdx); + }); + + it('reports extra extension ids that are referenced but not in the input', () => { + const externalExt = id('external-scope/ext@2.0.0'); + const comp = { + id: id('scope/comp1@1.0.0'), + extensions: [{ stringId: externalExt.toString(), newExtensionId: externalExt }], + }; + const result = buildLoadPlanGroups(inputFor([comp])); + expect(result.extraExtensionIds.map((c) => c.toStringWithoutVersion())).to.deep.equal(['external-scope/ext']); + }); + + it('layers a chain of extensions deepest-first', () => { + const extC = { id: id('scope/ext-c@1.0.0') }; + const extB = { + id: id('scope/ext-b@1.0.0'), + extensions: [{ stringId: extC.id.toString(), newExtensionId: extC.id }], + }; + const extA = { + id: id('scope/ext-a@1.0.0'), + extensions: [{ stringId: extB.id.toString(), newExtensionId: extB.id }], + }; + // The "user" component pulls extA in. extA pulls extB. extB pulls extC. All should + // end up in the load plan as extension components, in order [C], [B], [A]. + const userComp = { + id: id('scope/user@1.0.0'), + extensions: [{ stringId: extA.id.toString(), newExtensionId: extA.id }], + }; + const result = buildLoadPlanGroups(inputFor([userComp, extA, extB, extC])); + + // Find the layered ext groups (envs=false, aspects=true). + const extGroups = result.groups.filter((g) => g.aspects && !g.envs && !g.core); + const orderedExts = extGroups.map((g) => g.ids.map((c) => c.toStringWithoutVersion())); + expect(orderedExts).to.deep.equal([['scope/ext-c'], ['scope/ext-b'], ['scope/ext-a']]); + }); + + it('drops empty groups from the result', () => { + const result = buildLoadPlanGroups(inputFor([{ id: id('scope/regular@1.0.0') }])); + for (const g of result.groups) expect(g.ids.length).to.be.greaterThan(0); + }); +}); diff --git a/scopes/workspace/workspace/workspace-component/load-plan.ts b/scopes/workspace/workspace/workspace-component/load-plan.ts new file mode 100644 index 000000000000..3b38b4dc4054 --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/load-plan.ts @@ -0,0 +1,143 @@ +import type { ComponentID } from '@teambit/component-id'; +import { groupEnvsByDepLayer } from './env-dag-sort'; +import { groupExtsByDepLayer } from './dep-dag-sort'; + +/** + * What the pure load-plan builder needs to know about a component. + * The caller is responsible for populating this view of the world (typically + * by warming caches before calling `buildLoadPlanGroups`). + */ +export interface LoadPlanInput { + /** Components present in the workspace, in the order the caller wants them processed. */ + workspaceIds: ComponentID[]; + /** Components present only in scope. */ + scopeIds: ComponentID[]; + /** Returns true for components that are core envs (no loading required). */ + isCoreEnv: (id: ComponentID) => boolean; + /** + * The extensions list configured on `id` — `stringId` is the aspect string id + * (with version, as written in config); `newExtensionId` is the resolved + * ComponentID of that extension's component, when known. + */ + extensionsOf: (id: ComponentID) => Array<{ stringId: string; newExtensionId?: ComponentID }>; + /** The env id (with version) configured on `id`, if any. */ + envIdOf: (id: ComponentID) => string | undefined; +} + +export interface RawLoadGroup { + ids: ComponentID[]; + core: boolean; + aspects: boolean; + seeders: boolean; + envs: boolean; +} + +export interface BuildLoadPlanResult { + /** Groups in load order, with empty groups already filtered out. Caller still needs to split ids by ws/scope. */ + groups: RawLoadGroup[]; + /** + * Extension component IDs discovered while building the plan that weren't part + * of the original input. The caller must register them (typically via + * `groupAndUpdateIds`) before applying the ws/scope split, so the split + * sees their up-to-date workspace presence. + */ + extraExtensionIds: ComponentID[]; +} + +/** + * Pure function that produces the load groups in canonical order: + * + * 1. Core envs (always first — no loading needed). + * 2. Layered envs of workspace components (deepest first via env-DAG sort). + * 3. Extra extension components not in the input (one flat group). + * 4. Layered extensions of components in the input (deepest first via ext-DAG sort). + * 5. Regular components (non-env, non-ext) — load last. + * + * Empty groups are filtered. The caller is responsible for splitting each + * group's `ids` into workspaceIds/scopeIds, since that classification depends + * on workspace state that this function shouldn't reach into. + */ +export function buildLoadPlanGroups(input: LoadPlanInput): BuildLoadPlanResult { + const { workspaceIds, scopeIds, isCoreEnv, extensionsOf, envIdOf } = input; + const allIds = [...workspaceIds, ...scopeIds]; + + // Step 1: separate core envs from everything else. + const coreEnvs: ComponentID[] = []; + const nonCoreEnvs: ComponentID[] = []; + for (const id of allIds) { + if (isCoreEnv(id)) coreEnvs.push(id); + else nonCoreEnvs.push(id); + } + + // Step 2: collect all extension component ids referenced by non-core-envs (deduped). + const allExtIds = new Map(); + for (const id of nonCoreEnvs) { + for (const ext of extensionsOf(id)) { + if (ext.newExtensionId && !allExtIds.has(ext.stringId)) { + allExtIds.set(ext.stringId, ext.newExtensionId); + } + } + } + const allExtCompIds = Array.from(allExtIds.values()); + + // Step 3: identify envs of workspace components. + const envsIdsOfWsComps = new Set(); + for (const id of workspaceIds) { + const envId = envIdOf(id); + if (envId) envsIdsOfWsComps.add(envId); + } + + // Step 4: split extension comp ids by whether they're an env-of-ws-comp. + const extsThatAreEnvsOfWsComps: ComponentID[] = []; + const extsThatArent: ComponentID[] = []; + for (const id of allExtCompIds) { + const idStr = id.toString(); + const withoutVersion = idStr.split('@')[0]; + if (envsIdsOfWsComps.has(idStr) || envsIdsOfWsComps.has(withoutVersion)) { + extsThatAreEnvsOfWsComps.push(id); + } else { + extsThatArent.push(id); + } + } + const notEnvOfWsCompsStrs = new Set(extsThatArent.map((id) => id.toString())); + + // Step 5: from the original non-core-env inputs, identify which are extensions of others in the list. + const extsFromTheList: ComponentID[] = []; + const nonExtNonEnvComps: ComponentID[] = []; + for (const id of nonCoreEnvs) { + if (notEnvOfWsCompsStrs.has(id.toString())) extsFromTheList.push(id); + else nonExtNonEnvComps.push(id); + } + + // Step 6: identify extension ids that aren't already in the input list — caller must add them. + const extsFromTheListStrs = new Set(extsFromTheList.map((id) => id.toString())); + const extraExtensionIds: ComponentID[] = []; + for (const id of allExtIds.values()) { + if (!extsFromTheListStrs.has(id.toString())) extraExtensionIds.push(id); + } + + // Step 7: layer envs and extensions topologically (deepest first). + const layeredEnvs = groupEnvsByDepLayer(extsThatAreEnvsOfWsComps, (id) => envIdOf(id), envsIdsOfWsComps); + const layeredExts = groupExtsByDepLayer(extsFromTheList, (id) => { + const out: string[] = []; + for (const ext of extensionsOf(id)) { + out.push(ext.stringId); + if (ext.newExtensionId) out.push(ext.newExtensionId.toStringWithoutVersion()); + } + return out; + }); + + // Step 8: assemble groups in canonical order. + const rawGroups: RawLoadGroup[] = [ + { ids: coreEnvs, core: true, aspects: true, seeders: true, envs: true }, + ...layeredEnvs.map((ids) => ({ ids, core: false, aspects: true, seeders: true, envs: true })), + { ids: extraExtensionIds, core: false, aspects: true, seeders: false, envs: false }, + ...layeredExts.map((ids) => ({ ids, core: false, aspects: true, seeders: true, envs: false })), + { ids: nonExtNonEnvComps, core: false, aspects: false, seeders: true, envs: false }, + ]; + + return { + groups: rawGroups.filter((g) => g.ids.length > 0), + extraExtensionIds, + }; +} diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index 3002df75a046..f43d4357d371 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -7,7 +7,7 @@ import { ComponentFS, Config, State, TagMap } from '@teambit/component'; import type { ComponentID } from '@teambit/component-id'; import { ComponentIdList } from '@teambit/component-id'; import mapSeries from 'p-map-series'; -import { compact, fromPairs, groupBy, pick, uniq, uniqBy } from 'lodash'; +import { compact, fromPairs, pick, uniq, uniqBy } from 'lodash'; import type { ComponentLoadOptions as LegacyComponentLoadOptions } from '@teambit/legacy.consumer-component'; import { ComponentNotFoundInPath, ConsumerComponent, Dependencies } from '@teambit/legacy.consumer-component'; import { MissingBitMapComponent } from '@teambit/legacy.bit-map'; @@ -25,8 +25,8 @@ import type { AspectLoaderMain } from '@teambit/aspect-loader'; import type { Workspace } from '../workspace'; import { WorkspaceComponent } from './workspace-component'; import { MergeConfigConflict } from '../exceptions/merge-config-conflict'; -import { groupEnvsByDepLayer } from './env-dag-sort'; -import { groupExtsByDepLayer } from './dep-dag-sort'; +import { buildLoadPlanGroups } from './load-plan'; +import type { LoadPlanInput } from './load-plan'; type GetManyRes = { components: Component[]; @@ -220,151 +220,62 @@ export class WorkspaceComponentLoader { private async buildLoadGroups(workspaceScopeIdsMap: WorkspaceScopeIdsMap): Promise> { const wsIds = Array.from(workspaceScopeIdsMap.workspaceIds.values()); const scopeIds = Array.from(workspaceScopeIdsMap.scopeIds.values()); + + // Phase 1 (side-effecting): warm the extensions cache for everything that needs to + // factor into the plan. This is iterative — we have to populate non-core-envs first + // to discover what extension components they reference, then populate those too. const allIds = [...wsIds, ...scopeIds]; - const groupedByIsCoreEnvs = groupBy(allIds, (id) => { - return this.envs.isCoreEnv(id.toStringWithoutVersion()); - }); - const nonCoreEnvs = groupedByIsCoreEnvs.false || []; + const nonCoreEnvs = allIds.filter((id) => !this.envs.isCoreEnv(id.toStringWithoutVersion())); await this.populateScopeAndExtensionsCache(nonCoreEnvs, workspaceScopeIdsMap); - const allExtIds: Map = new Map(); - nonCoreEnvs.forEach((id) => { - const idStr = id.toString(); - const fromCache = this.componentsExtensionsCache.get(idStr); - if (!fromCache || !fromCache.extensions) { - return; - } - fromCache.extensions.forEach((ext) => { + + const allExtIds = new Map(); + for (const id of nonCoreEnvs) { + const fromCache = this.componentsExtensionsCache.get(id.toString()); + if (!fromCache?.extensions) continue; + for (const ext of fromCache.extensions) { if (!allExtIds.has(ext.stringId) && ext.newExtensionId) { allExtIds.set(ext.stringId, ext.newExtensionId); } - }); - }); - const allExtCompIds = Array.from(allExtIds.values()); - await this.populateScopeAndExtensionsCache(allExtCompIds || [], workspaceScopeIdsMap); - - // const allExtIdsStr = allExtCompIds.map((id) => id.toString()); - - const envsIdsOfWsComps = new Set(); - wsIds.forEach((id) => { - const idStr = id.toString(); - const fromCache = this.componentsExtensionsCache.get(idStr); - if (!fromCache || !fromCache.envId) { - return; - } - const envId = fromCache.envId; - if (envId) { - envsIdsOfWsComps.add(envId); - } - }); - - const groupedByIsEnvOfWsComps = groupBy(allExtCompIds, (id) => { - const idStr = id.toString(); - const withoutVersion = idStr.split('@')[0]; - return envsIdsOfWsComps.has(idStr) || envsIdsOfWsComps.has(withoutVersion); - }); - const notEnvOfWsCompsStrs = (groupedByIsEnvOfWsComps.false || []).map((id) => id.toString()); - - const groupedByIsExtOfAnother = groupBy(nonCoreEnvs, (id) => { - return notEnvOfWsCompsStrs.includes(id.toString()); - }); - const extIdsFromTheList = (groupedByIsExtOfAnother.true || []).map((id) => id.toString()); - const extsNotFromTheList: ComponentID[] = []; - for (const [, id] of allExtIds.entries()) { - if (!extIdsFromTheList.includes(id.toString())) { - extsNotFromTheList.push(id); } } + await this.populateScopeAndExtensionsCache(Array.from(allExtIds.values()), workspaceScopeIdsMap); - await this.groupAndUpdateIds(extsNotFromTheList, workspaceScopeIdsMap); - - const layeredExtFromTheList = this.regroupExtIdsFromTheList(groupedByIsExtOfAnother.true); - const layeredExtGroups = layeredExtFromTheList.map((ids) => { - return { - ids, - core: false, - aspects: true, - seeders: true, - envs: false, - }; - }); - - const layeredEnvsFromTheList = this.regroupEnvsIdsFromTheList(groupedByIsEnvOfWsComps.true, envsIdsOfWsComps); - const layeredEnvsGroups = layeredEnvsFromTheList.map((ids) => { - return { - ids, - core: false, - aspects: true, - seeders: true, - envs: true, - }; - }); - - const groupsToHandle = [ - // Always load first core envs - { ids: groupedByIsCoreEnvs.true || [], core: true, aspects: true, seeders: true, envs: true }, - // The recursive env-DAG sort in `groupEnvsByDepLayer` places envs at the - // bottom of the chain (e.g. `core-aspect-env`/`core-aspect-env-jest`) in the - // earliest layer naturally. The hardcoded `core-aspect-env` prepending that - // used to live here is redundant under the recursive sort — see D-001. - ...layeredEnvsGroups, - { ids: extsNotFromTheList || [], core: false, aspects: true, seeders: false, envs: false }, - ...layeredExtGroups, - { ids: groupedByIsExtOfAnother.false || [], core: false, aspects: false, seeders: true, envs: false }, - ]; - - const groupsByWsScope = groupsToHandle.map((group) => { - if (!group.ids?.length) return undefined; - const groupedByWsScope = groupBy(group.ids, (id) => { - return workspaceScopeIdsMap.workspaceIds.has(id.toString()); - }); + // Phase 2 (pure): build the canonical group structure using cached state. + const planInput: LoadPlanInput = { + workspaceIds: wsIds, + scopeIds, + isCoreEnv: (id) => this.envs.isCoreEnv(id.toStringWithoutVersion()), + extensionsOf: (id) => { + const fromCache = this.componentsExtensionsCache.get(id.toString()); + if (!fromCache?.extensions) return []; + return Array.from(fromCache.extensions).map((ext) => ({ + stringId: ext.stringId, + newExtensionId: ext.newExtensionId, + })); + }, + envIdOf: (id) => this.componentsExtensionsCache.get(id.toString())?.envId, + }; + const { groups: rawGroups, extraExtensionIds } = buildLoadPlanGroups(planInput); + + // Phase 3 (side-effecting): register extension comp ids that weren't in the input, + // then split each group's ids by ws/scope using the up-to-date map. + await this.groupAndUpdateIds(extraExtensionIds, workspaceScopeIdsMap); + return rawGroups.map((group) => { + const wsIdsInGroup: ComponentID[] = []; + const scopeIdsInGroup: ComponentID[] = []; + for (const id of group.ids) { + if (workspaceScopeIdsMap.workspaceIds.has(id.toString())) wsIdsInGroup.push(id); + else scopeIdsInGroup.push(id); + } return { - workspaceIds: groupedByWsScope.true || [], - scopeIds: groupedByWsScope.false || [], + workspaceIds: wsIdsInGroup, + scopeIds: scopeIdsInGroup, core: group.core, aspects: group.aspects, seeders: group.seeders, envs: group.envs, }; }); - return compact(groupsByWsScope); - } - - /** - * Layer envs so that envs-of-envs load before their dependents. - * Thin wrapper over `groupEnvsByDepLayer` — see that function for the algorithm. - */ - private regroupEnvsIdsFromTheList(envIds: ComponentID[] = [], envsIdsOfWsComps: Set): Array { - return groupEnvsByDepLayer( - envIds, - (envId) => { - const fromCache = this.componentsExtensionsCache.get(envId.toString()); - if (!fromCache || !fromCache.extensions) return undefined; - return fromCache.envId; - }, - envsIdsOfWsComps - ); - } - - /** - * Layer extension IDs so that extensions-of-extensions load before their dependents. - * Thin wrapper over `groupExtsByDepLayer` — see that function for the algorithm. - * - * Resolves an extension's own extensions via `componentsExtensionsCache`, which - * `populateScopeAndExtensionsCache` populates for all extensions in the load list. - * Returns extension ids in both with-version and without-version forms; the - * topo-sort matches either against the input list. - */ - private regroupExtIdsFromTheList(ids: ComponentID[] = []): Array { - return groupExtsByDepLayer(ids, (extId) => { - const fromCache = this.componentsExtensionsCache.get(extId.toString()); - if (!fromCache?.extensions) return []; - const depKeys: string[] = []; - for (const ext of fromCache.extensions) { - if (ext.stringId) depKeys.push(ext.stringId); - if (ext.newExtensionId) depKeys.push(ext.newExtensionId.toStringWithoutVersion()); - } - return depKeys; - }); } private async getAndLoadSlot( From 35767111c5983df4b4bd77b40a94656189b6d7d3 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 16:38:11 -0400 Subject: [PATCH 10/17] refactor(workspace): extract id classifier (workspace vs scope) as pure function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pulls the workspace-vs-scope partitioning logic out of groupAndUpdateIds as classifyIds in discovery.ts, with 7 unit tests covering ws/scope mixes, version resolution, and append-to-existing-result. Side effect: the workspace's id surface (listIds + locallyDeletedIds) is now fetched once per call instead of once per id — V1 fanned this out via Promise.all but the result was always identical for every parallel call within one invocation, so caching upfront is behavior-preserving. --- .../workspace-component/discovery.spec.ts | 84 +++++++++++++++++++ .../workspace-component/discovery.ts | 48 +++++++++++ .../workspace-component/load-plan.spec.ts | 9 +- .../workspace-component-loader.ts | 32 ++++--- 4 files changed, 152 insertions(+), 21 deletions(-) create mode 100644 scopes/workspace/workspace/workspace-component/discovery.spec.ts create mode 100644 scopes/workspace/workspace/workspace-component/discovery.ts diff --git a/scopes/workspace/workspace/workspace-component/discovery.spec.ts b/scopes/workspace/workspace/workspace-component/discovery.spec.ts new file mode 100644 index 000000000000..d8fb0dc0c805 --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/discovery.spec.ts @@ -0,0 +1,84 @@ +import { expect } from 'chai'; +import { ComponentID } from '@teambit/component-id'; +import { classifyIds } from './discovery'; + +function id(idStr: string): ComponentID { + return ComponentID.fromString(idStr); +} + +describe('classifyIds', () => { + it('returns empty maps for empty input', () => { + const result = classifyIds([], { knownWorkspaceIds: [], resolveWorkspaceVersion: (i) => i }); + expect(result.workspaceIds.size).to.equal(0); + expect(result.scopeIds.size).to.equal(0); + }); + + it('classifies an id present in the workspace as a workspace id', () => { + const wsId = id('scope/comp1@1.0.0'); + const result = classifyIds([wsId], { + knownWorkspaceIds: [wsId], + resolveWorkspaceVersion: (i) => i, + }); + expect(result.workspaceIds.size).to.equal(1); + expect(result.workspaceIds.get(wsId.toString())?.toString()).to.equal(wsId.toString()); + expect(result.scopeIds.size).to.equal(0); + }); + + it('classifies an id absent from the workspace as a scope id', () => { + const scopeOnly = id('scope/comp1@1.0.0'); + const result = classifyIds([scopeOnly], { + knownWorkspaceIds: [], + resolveWorkspaceVersion: (i) => i, + }); + expect(result.scopeIds.size).to.equal(1); + expect(result.scopeIds.get(scopeOnly.toString())?.toString()).to.equal(scopeOnly.toString()); + expect(result.workspaceIds.size).to.equal(0); + }); + + it('matches workspace presence ignoring version when input has no version', () => { + const inputId = id('scope/comp1'); + const wsId = id('scope/comp1@1.0.0'); + const result = classifyIds([inputId], { + knownWorkspaceIds: [wsId], + resolveWorkspaceVersion: () => wsId, + }); + expect(result.workspaceIds.size).to.equal(1); + expect(result.workspaceIds.get(wsId.toString())?.toString()).to.equal(wsId.toString()); + }); + + it('uses the resolved version as the workspaceIds map key, not the input id', () => { + const inputId = id('scope/comp1'); + const wsId = id('scope/comp1@2.5.0'); + const result = classifyIds([inputId], { + knownWorkspaceIds: [wsId], + resolveWorkspaceVersion: () => wsId, + }); + expect(Array.from(result.workspaceIds.keys())).to.deep.equal([wsId.toString()]); + }); + + it('appends to an existing result map when one is provided', () => { + const first = id('scope/a@1.0.0'); + const second = id('scope/b@1.0.0'); + const initial = classifyIds([first], { + knownWorkspaceIds: [first], + resolveWorkspaceVersion: (i) => i, + }); + const after = classifyIds([second], { knownWorkspaceIds: [], resolveWorkspaceVersion: (i) => i }, initial); + expect(after).to.equal(initial); + expect(after.workspaceIds.has(first.toString())).to.equal(true); + expect(after.scopeIds.has(second.toString())).to.equal(true); + }); + + it('handles a mix of workspace and scope ids in one call', () => { + const wsComp = id('scope/in-ws@1.0.0'); + const scopeComp = id('scope/scope-only@2.0.0'); + const result = classifyIds([wsComp, scopeComp], { + knownWorkspaceIds: [wsComp], + resolveWorkspaceVersion: (i) => i, + }); + expect(result.workspaceIds.size).to.equal(1); + expect(result.scopeIds.size).to.equal(1); + expect(result.workspaceIds.has(wsComp.toString())).to.equal(true); + expect(result.scopeIds.has(scopeComp.toString())).to.equal(true); + }); +}); diff --git a/scopes/workspace/workspace/workspace-component/discovery.ts b/scopes/workspace/workspace/workspace-component/discovery.ts new file mode 100644 index 000000000000..774dcbe2f59d --- /dev/null +++ b/scopes/workspace/workspace/workspace-component/discovery.ts @@ -0,0 +1,48 @@ +import type { ComponentID } from '@teambit/component-id'; + +/** + * Result of discovery: input ComponentIDs partitioned into "workspace components" + * (present in the bitmap, with versions resolved) and "scope components" (everything else). + * + * Keys for `workspaceIds` are the *resolved* id string (with version filled in from + * the bitmap). Keys for `scopeIds` are the input id string as-given. The asymmetry + * is preserved from V1 — workspace ids need version resolution because `bit show comp1` + * doesn't include a version, while scope ids are typically already versioned. + */ +export interface DiscoveredIds { + workspaceIds: Map; + scopeIds: Map; +} + +export interface DiscoveryInput { + /** + * All workspace component ids — including locally-deleted ones, since users may + * still want to load and inspect them. + */ + knownWorkspaceIds: ComponentID[]; + /** + * Resolve a (possibly versionless) workspace ComponentID to its full versioned form + * using bitmap state. Pure with respect to the bitmap snapshot at the time of the call. + */ + resolveWorkspaceVersion: (id: ComponentID) => ComponentID; +} + +/** + * Pure classifier: partition input ids into workspace vs scope buckets, resolving + * workspace versions from the bitmap. If `existing` is passed, results are appended + * to it (preserving V1's "append to map" behavior used during plan-building when + * extra extension ids are discovered after the initial pass). + */ +export function classifyIds(ids: ComponentID[], input: DiscoveryInput, existing?: DiscoveredIds): DiscoveredIds { + const result = existing ?? { workspaceIds: new Map(), scopeIds: new Map() }; + for (const id of ids) { + const inWs = input.knownWorkspaceIds.find((wsId) => wsId.isEqual(id, { ignoreVersion: !id.hasVersion() })); + if (!inWs) { + result.scopeIds.set(id.toString(), id); + continue; + } + const resolved = input.resolveWorkspaceVersion(id); + result.workspaceIds.set(resolved.toString(), resolved); + } + return result; +} diff --git a/scopes/workspace/workspace/workspace-component/load-plan.spec.ts b/scopes/workspace/workspace/workspace-component/load-plan.spec.ts index 8078f8783e6d..e7be89c0a599 100644 --- a/scopes/workspace/workspace/workspace-component/load-plan.spec.ts +++ b/scopes/workspace/workspace/workspace-component/load-plan.spec.ts @@ -55,10 +55,13 @@ describe('buildLoadPlanGroups', () => { }); it("layers a workspace component's env before the component", () => { - const env = { id: id('scope/my-env@1.0.0') }; - const comp = { id: id('scope/my-comp@1.0.0'), envId: env.id.toStringWithoutVersion() }; + const env: MockComp = { id: id('scope/my-env@1.0.0') }; // The component lists my-env in its extensions so it gets discovered as an ext component. - comp.extensions = [{ stringId: env.id.toString(), newExtensionId: env.id }]; + const comp: MockComp = { + id: id('scope/my-comp@1.0.0'), + envId: env.id.toStringWithoutVersion(), + extensions: [{ stringId: env.id.toString(), newExtensionId: env.id }], + }; const result = buildLoadPlanGroups(inputFor([env, comp])); const envIdx = result.groups.findIndex((g) => g.envs && g.ids.some((c) => c.toString() === env.id.toString())); diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index f43d4357d371..d26e3a5035ac 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -27,6 +27,8 @@ import { WorkspaceComponent } from './workspace-component'; import { MergeConfigConflict } from '../exceptions/merge-config-conflict'; import { buildLoadPlanGroups } from './load-plan'; import type { LoadPlanInput } from './load-plan'; +import { classifyIds } from './discovery'; +import type { DiscoveredIds } from './discovery'; type GetManyRes = { components: Component[]; @@ -422,25 +424,19 @@ export class WorkspaceComponentLoader { ids: ComponentID[], existingGroups?: WorkspaceScopeIdsMap ): Promise { - const result: WorkspaceScopeIdsMap = existingGroups || { - scopeIds: new Map(), - workspaceIds: new Map(), - }; - - await Promise.all( - ids.map(async (componentId) => { - const inWs = await this.isInWsIncludeDeleted(componentId); - - if (!inWs) { - result.scopeIds.set(componentId.toString(), componentId); - return undefined; - } - const resolvedVersions = this.resolveVersion(componentId); - result.workspaceIds.set(resolvedVersions.toString(), resolvedVersions); - return undefined; - }) + // Fetch the workspace's id surface once. Under V1 each call into + // isInWsIncludeDeleted re-fetched locallyDeletedIds() per component; the result + // is identical for every call within one invocation, so caching here is a behavior- + // preserving optimization (and makes the inner classifier pure/synchronous). + const knownWorkspaceIds = this.workspace.listIds().concat(await this.workspace.locallyDeletedIds()); + return classifyIds( + ids, + { + knownWorkspaceIds, + resolveWorkspaceVersion: (id) => this.resolveVersion(id), + }, + existingGroups as DiscoveredIds | undefined ); - return result; } private async isInWsIncludeDeleted(componentId: ComponentID): Promise { From 851813b951b52a231e9fea36007838a94bd56d2f Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 16:47:00 -0400 Subject: [PATCH 11/17] docs(loader-rewrite): document cache invariants in D-002 + inline comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original RFC called the loader's caching "the caching nightmare". After tracing the actual behavior, the rules are coherent — they were just undocumented. D-002 enumerates the four caches, the cache-key composition (four flags that genuinely produce a different Component), and the "exact-match-or-fully-loaded" lookup rule. Inline comments at the saveInCache / getFromCache / createComponentCacheKey touchpoints point back to D-002 so a reader of the source can find the invariants. No behavior change. --- .../component-loading-rewrite/DECISIONS.md | 123 ++++++++++++++++++ .../workspace-component-loader.ts | 80 ++++++++---- 2 files changed, 181 insertions(+), 22 deletions(-) diff --git a/docs/rfcs/component-loading-rewrite/DECISIONS.md b/docs/rfcs/component-loading-rewrite/DECISIONS.md index 324207010083..62821ed5afed 100644 --- a/docs/rfcs/component-loading-rewrite/DECISIONS.md +++ b/docs/rfcs/component-loading-rewrite/DECISIONS.md @@ -5,6 +5,129 @@ why, and what evidence shaped it. New decisions go at the top. --- +## D-002: The cache layout is defensible — write the invariants down before changing them + +**Date:** 2026-05-07 +**Status:** Accepted (no code change beyond inline comments) + +### Context + +The original RFC named "the caching nightmare" as one of the top complexity +hotspots. It listed: + +- 12 boolean flags affect loading, but the cache key uses only 4 +- Cache lookup tries the given options first, then falls back to a hardcoded + `{ loadExtensions: true, executeLoadSlot: true }` +- Multiple overlapping caches with no unified invalidation + +After tracing the actual behavior, the rules are coherent — they're just +undocumented. Recording them here so future changes can preserve them +deliberately rather than re-discover them. + +### The four caches + +| Cache | Stores | Key | Populated by | +| ------------------------------ | -------------------------------- | -------------------------- | ------------------------------------- | +| `componentsCache` | Fully-loaded `Component` objects | `${id}:${json(load-opts)}` | `saveInCache` after `getMany` / `get` | +| `scopeComponentsCache` | Scope-only `Component` objects | `id.toString()` | `populateScopeAndExtensionsCache` | +| `componentsExtensionsCache` | `{ extensions, errors, envId }` | `id.toString()` | `populateScopeAndExtensionsCache` | +| `componentLoadedSelfAsAspects` | A boolean memoization flag | `id.toString()` | `loadCompsAsAspects` | + +`scopeComponentsCache` and `componentsExtensionsCache` are intermediate +caches used during the load pipeline (`buildLoadGroups` warms them, the +plan-builder reads them). They're not really "caches of the loader's output" +— they're scratch state for one load operation. + +`componentLoadedSelfAsAspects` is a memoization flag, not a cache of values. + +The only output cache is `componentsCache`. + +### The cache key for `componentsCache` + +`createComponentCacheKey(id, loadOpts)` picks four boolean flags from +`loadOpts`: `loadExtensions`, `executeLoadSlot`, `loadDocs`, +`loadCompositions`. Each of the four genuinely produces a distinguishable +Component: + +- `loadExtensions: false` → `loadComponentsExtensions` is skipped, so + external aspects aren't registered with Harmony for this load. The + Component object itself ends up similar but the side effects on the + Harmony aspect graph differ. +- `executeLoadSlot: false` → the `onComponentLoad` slot subscribers don't + fire, so `state.aspects` doesn't accumulate post-slot upserts. +- `loadDocs: false` → the docs aspect's slot subscriber early-returns + (`docs.main.runtime.ts:220`), so the docs aspect's `data` is empty. +- `loadCompositions: false` → same pattern in the compositions aspect + (`compositions.main.runtime.ts:141`). + +Other flags from `ComponentLoadOptions` (`storeInCache`, +`storeDepsInFsCache`, `originatedFromHarmony`, etc.) don't change the +Component value — they affect whether the loader caches, where it stores fs +state, etc. Including them in the key would split the cache without value. + +### The "exact-match-or-fully-loaded" lookup rule + +`getFromCache(id, loadOpts)` looks up two keys: + +1. The exact key for the requested options. +2. A "fully loaded" key built from `{ loadExtensions: true, executeLoadSlot: true }`. + +If either hits, the cached Component is returned. The second lookup makes +sure that once a component has been "fully loaded" by some prior call, every +subsequent call benefits from the cache — even if it asks for a less-loaded +shape. This is sound because: + +- A "fully loaded" Component contains a _superset_ of what a less-loaded + Component would. The requesting caller gets at least what they asked for. +- The expensive work (slot fires, extension registration) was already paid + for; we don't redo it. + +The cache is _not_ a per-flag-combination cache; it's a "best-shape we've +got" cache with a labeling convention. + +### Why `getMany` saves with hardcoded `{ loadExtensions: true, executeLoadSlot: true }` + +`getMany` defaults `loadExtensions: false, executeLoadSlot: false` for the +_initial_ `consumer.loadComponents` call — that's a perf optimization, not +the final state. Inside `getAndLoadSlot` the loader unconditionally runs +`executeLoadSlot` (line 315) and conditionally runs `loadComponentsExtensions` +(when `loadOpts.loadExtensions` is true). So by the time `saveInCache` runs, +each component is "as loaded as the caller asked for, plus the slot fired". + +Storing with `{ loadExtensions: true, executeLoadSlot: true }` reflects the +fact that the slot did fire and (when applicable) extensions were loaded — +even though the _initial_ call had those flags false. This is what makes the +exact-match-or-fully-loaded rule work in practice: the cache key tracks the +post-load state, not the pre-load options. + +### Implications for the rewrite + +- Don't try to "simplify" the cache by removing the four-flag key. Each flag + produces a real difference in `state.aspects` post-slot. +- Don't try to remove the fully-loaded fallback. Without it, `getMany` + results never re-hit on subsequent calls with different default flags, + and the cache becomes useless for the most common code paths. +- Do consider whether `componentLoadedSelfAsAspects` could be folded into a + metadata field on the Component itself, so it's not a separate cache to + invalidate. Not urgent. +- `scopeComponentsCache` and `componentsExtensionsCache` are scratch state + for one load operation. In the rewrite they should live on the load-plan + context, not on the loader instance. + +### Evidence index + +| Claim | File:Line | +| -------------------------------------------------- | ------------------------------------------------ | +| 4 caches in the loader | `workspace-component-loader.ts:94, 98, 103, 109` | +| Cache key picks 4 flags | `workspace-component-loader.ts:878` | +| `loadDocs` consumed by docs aspect | `docs.main.runtime.ts:220` | +| `loadCompositions` consumed by compositions aspect | `compositions.main.runtime.ts:141` | +| Fully-loaded fallback in lookup | `workspace-component-loader.ts:753-754` | +| `getMany` saves with hardcoded fully-loaded key | `workspace-component-loader.ts:168` | +| `executeLoadSlot` runs unconditionally | `workspace-component-loader.ts:315` | + +--- + ## D-001: The env↔component recursion is a topological-ordering problem, not a cycle **Date:** 2026-05-07 diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index d26e3a5035ac..ed5c11a0b67e 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -91,22 +91,31 @@ export type LoadCompAsAspectsOptions = { }; export class WorkspaceComponentLoader { - private componentsCache: InMemoryCache; // cache loaded components - /** - * Cache components that loaded from scope (especially for get many for perf improvements) - */ + // Loader caches — see D-002 in docs/rfcs/component-loading-rewrite/DECISIONS.md + // for the full invariants. Briefly: + // + // componentsCache — final loaded Component objects. Key is + // `${id}:${json(load-opts)}`. Lookup follows + // the "exact-match-or-fully-loaded" rule + // (see getFromCache). + // scopeComponentsCache — scratch state for one load operation: + // the scope-only Component for an id. + // Read by populateScopeAndExtensionsCache + // and load-plan.ts via the loader's lookups. + // componentsExtensionsCache — scratch state for one load operation: + // the merged extensions / envId for an id, + // produced before the full load runs. + // componentLoadedSelfAsAspects — memoization flag (not a value cache): + // ensures each component is registered as + // an aspect at most once. + private componentsCache: InMemoryCache; private scopeComponentsCache: InMemoryCache; - /** - * Cache extension list for components. used by get many for perf improvements. - * And to make sure we load extensions first. - */ private componentsExtensionsCache: InMemoryCache<{ extensions: ExtensionDataList; errors: Error[] | undefined; envId: string | undefined; }>; - - private componentLoadedSelfAsAspects: InMemoryCache; // cache loaded components + private componentLoadedSelfAsAspects: InMemoryCache; constructor( private workspace: Workspace, private logger: Logger, @@ -163,7 +172,11 @@ export class WorkspaceComponentLoader { return comp.id.toString(); }); - // this.logger.clearStatusLine(); + // Save under the "fully loaded" key even though loadOptsWithDefaults may have + // had loadExtensions/executeLoadSlot set to false. By this point in getMany + // the slot has fired (executeLoadSlot runs unconditionally inside getAndLoadSlot) + // and extensions were loaded when requested, so the post-load shape is + // fully-loaded. See D-002 for why the key tracks post-load state. components.forEach((comp) => { this.saveInCache(comp, { loadExtensions: true, executeLoadSlot: true }); }); @@ -730,28 +743,36 @@ export class WorkspaceComponentLoader { return this.executeLoadSlot(newComponent, loadOpts); } + /** + * Cache the loaded Component under a key derived from `loadOpts`. Callers + * typically pass `{ loadExtensions: true, executeLoadSlot: true }` even when + * the *initial* load options had those false — by the time the loader + * reaches save, the slot has fired and (when applicable) extensions have + * been registered, so the post-load state is "fully loaded". See D-002. + */ private saveInCache(component: Component, loadOpts?: ComponentLoadOptions): void { const cacheKey = createComponentCacheKey(component.id, loadOpts); this.componentsCache.set(cacheKey, component); } /** - * make sure that not only the id-str match, but also the legacy-id. - * this is needed because the ComponentID.toString() is the same whether or not the legacy-id has - * scope-name, as it includes the defaultScope if the scope is empty. - * as a result, when out-of-sync is happening and the id is changed to include scope-name in the - * legacy-id, the component is the cache has the old id. + * Lookup follows the "exact-match-or-fully-loaded" rule (D-002): try the + * caller's exact load-opts shape first; on miss, fall back to the + * `{ loadExtensions: true, executeLoadSlot: true }` shape that getMany / + * get use when saving. A fully-loaded Component is a superset of any + * less-loaded one, so the caller gets at least what it asked for. + * + * Also enforces an id-equality check (not just a string match) — when a + * component was loaded out-of-sync the canonical id-string is the same + * but the legacy id may have a different scope. We don't want to return + * the stale cached version in that case. */ private getFromCache(componentId: ComponentID, loadOpts?: ComponentLoadOptions): Component | undefined { const bitIdWithVersion: ComponentID = this.resolveVersion(componentId); const id = bitIdWithVersion.version ? componentId.changeVersion(bitIdWithVersion.version) : componentId; const cacheKey = createComponentCacheKey(id, loadOpts); - // If we try to look for the cache without load extensions/ without execute load slot - // but there is an entry after the load, we want to use it as well. - // as we want the component, so if we already loaded it with everything, it's fine. - // this sometime relevant for cases with tiny cache size (during tag) - const cacheKeyWithTrueLoadOpts = createComponentCacheKey(id, { loadExtensions: true, executeLoadSlot: true }); - const fromCache = this.componentsCache.get(cacheKey) || this.componentsCache.get(cacheKeyWithTrueLoadOpts); + const cacheKeyForFullyLoaded = createComponentCacheKey(id, { loadExtensions: true, executeLoadSlot: true }); + const fromCache = this.componentsCache.get(cacheKey) || this.componentsCache.get(cacheKeyForFullyLoaded); if (fromCache && fromCache.id.isEqual(id)) { return fromCache; } @@ -874,6 +895,21 @@ export class WorkspaceComponentLoader { } } +/** + * Cache key composition for `componentsCache`. Includes only the four flags + * that genuinely produce a different Component: + * + * - loadExtensions: false → external aspects aren't registered with Harmony + * - executeLoadSlot: false → onComponentLoad slots don't fire, so state.aspects + * doesn't accumulate post-slot upserts + * - loadDocs: false → docs aspect's slot subscriber early-returns + * (docs.main.runtime.ts:220), data is empty + * - loadCompositions: false → compositions aspect's slot subscriber + * early-returns (compositions.main.runtime.ts:141) + * + * Other ComponentLoadOptions (storeInCache, originatedFromHarmony, etc.) don't + * change the Component value and would split the cache without value if included. + */ function createComponentCacheKey(id: ComponentID, loadOpts?: ComponentLoadOptions): string { const relevantOpts = pick(loadOpts, ['loadExtensions', 'executeLoadSlot', 'loadDocs', 'loadCompositions']); return `${id.toString()}:${JSON.stringify(sortKeys(relevantOpts ?? {}))}`; From ede940c315f51a12ccb937612a410e1ffda95cd2 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 16:48:10 -0400 Subject: [PATCH 12/17] docs(workspace): add file-level overview comment to WorkspaceComponentLoader --- .../workspace-component-loader.ts | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index ed5c11a0b67e..0010d9e3d24b 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -1,3 +1,36 @@ +/** + * WorkspaceComponentLoader — loads components from the workspace. + * + * Public surface: + * getMany(ids) — load many components, with batching and shared phases + * get(id) — load one component + * getIfExist(id) — load one, return undefined on not-found + * getInvalid(ids) — return components that fail to load + * clearCache(), + * clearComponentCache(id) + * + * Internal call shape (getMany): + * + * getMany + * ├─ getFromCache (per id) [components cache] + * └─ getAndLoadSlotOrdered (for cache misses) + * ├─ groupAndUpdateIds → classifyIds [discovery.ts, pure] + * ├─ buildLoadGroups + * │ ├─ populateScopeAndExtensionsCache [warms scratch caches] + * │ └─ buildLoadPlanGroups [load-plan.ts, pure] + * │ ├─ groupEnvsByDepLayer [env-dag-sort.ts, pure] + * │ └─ groupExtsByDepLayer [dep-dag-sort.ts, pure] + * └─ getAndLoadSlot (per group, in order) + * ├─ getComponentsWithoutLoadExtensions + * │ └─ consumer.loadComponents → loadOne + * ├─ loadComponentsExtensions [optional] + * ├─ executeLoadSlot [onComponentLoad subscribers] + * └─ loadCompsAsAspects [register as Harmony aspects] + * + * Caching: see D-002 in docs/rfcs/component-loading-rewrite/DECISIONS.md. + * Recursion / load-order: see D-001 in the same file. + */ + import pMap from 'p-map'; import { getLatestVersionNumber } from '@teambit/legacy.utils'; import { pMapPool } from '@teambit/toolbox.promise.map-pool'; From fabc0344a87079e44cbac39b55906a3c348839ec Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 17:07:57 -0400 Subject: [PATCH 13/17] feat(workspace): collapse loader-diff env vars into one Single env var: BIT_LOADER_DIFF=N. Unset/0 = off. 1 = on, every call. N>1 = sample every Nth call. Drops BIT_LOADER_DIFF_SAMPLE (folded into BIT_LOADER_DIFF) and the user-facing BIT_LOADER_DIFF_OUT (kept internal-only for e2e tests). The harness prints the log path to stderr at startup so the user can find the output without consulting docs. --- .../SNAPSHOT-CONTRACT.md | 23 ++++++++++---- e2e/harmony/loader-diff-harness.e2e.ts | 5 ++- .../loader-diff/harness.ts | 29 ++++++++--------- .../workspace-component/loader-diff/index.ts | 31 ++++++++----------- scopes/workspace/workspace/workspace.ts | 14 ++++----- 5 files changed, 53 insertions(+), 49 deletions(-) diff --git a/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md b/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md index ab6c0538b80e..3fc38ad96ee5 100644 --- a/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md +++ b/docs/rfcs/component-loading-rewrite/SNAPSHOT-CONTRACT.md @@ -90,10 +90,21 @@ that's a real gap — either the command is using a different loader path (`consumer.loadComponents` directly, scope-only loading, etc.) or the wrapping in `Workspace`'s constructor missed something. File it as a follow-up. -## Sample rate (`BIT_LOADER_DIFF_SAMPLE`) +## How to run it (`BIT_LOADER_DIFF`) -On large workspaces (bit7 itself, ~300 components), running both loaders for -every call doubles cache footprint and can OOM Node's default 4GB heap. Set -`BIT_LOADER_DIFF_SAMPLE=10` to run the partner only every 10th call. The -trade-off: sampling can miss regressions that only manifest on the -non-sampled calls. Use the lowest sample rate the workspace can afford. +One env var, one number: + +- `BIT_LOADER_DIFF=1 bit status` — compare on every loader call. Use this on + small workspaces. +- `BIT_LOADER_DIFF=50 bit status` — compare every 50th call. Use this on + large workspaces (bit7 itself, ~300 components) where running both loaders + on every call doubles cache footprint and can OOM Node's default 4GB heap. +- unset / 0 → off (default). + +The harness prints the log path to stderr at startup, e.g. +`[loader-diff] sample 1/50 → /var/folders/.../bit-loader-diff-12345.jsonl`. +The first line of the file is a header; every other line is a divergence. +A clean V1-vs-V1 baseline produces only the header. + +Trade-off with `N > 1`: sampling can miss regressions that only manifest on +non-sampled calls. Use the lowest N the workspace can afford. diff --git a/e2e/harmony/loader-diff-harness.e2e.ts b/e2e/harmony/loader-diff-harness.e2e.ts index 1e36fe206bd3..1a36dc3e5ea5 100644 --- a/e2e/harmony/loader-diff-harness.e2e.ts +++ b/e2e/harmony/loader-diff-harness.e2e.ts @@ -99,12 +99,11 @@ describe('loader diff harness — V1-vs-V1 baseline', function () { }); it('bit status on a modified component produces zero diffs (sampled)', () => { - // BIT_LOADER_DIFF_SAMPLE=50 — workspaces with scope state make many loader + // BIT_LOADER_DIFF=50 — workspaces with scope state make many loader // calls per command; sampling caps the partner's footprint. helper.command.runCmd(`bit status`, helper.scopes.localPath, 'pipe', undefined, false, { - BIT_LOADER_DIFF: '1', + BIT_LOADER_DIFF: '50', BIT_LOADER_DIFF_OUT: logPath, - BIT_LOADER_DIFF_SAMPLE: '50', }); const lines = readDiffLog(logPath); expect( diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts b/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts index 9dd79d3d9462..187261533ad9 100644 --- a/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts +++ b/scopes/workspace/workspace/workspace-component/loader-diff/harness.ts @@ -18,20 +18,21 @@ type GetManyRes = { export type LoaderFactory = () => WorkspaceComponentLoader; export interface LoaderDiffHarnessOptions { - /** Where to write the JSONL diff log. Defaults to `/bit-loader-diff.jsonl`. */ - outputPath?: string; - /** Tag written into each log line, e.g. "v1-vs-v1" or "v1-vs-v2". */ - comparisonLabel: string; /** - * Sample rate. If `N > 1`, only every Nth call runs the partner loader. - * Reduces overhead on large workspaces where running both loaders for every - * call would OOM Node's default heap. Default: 1 (every call). + * Sample rate. `1` runs the partner on every loader call. `N > 1` runs it + * every Nth call — use this on large workspaces where running both loaders + * for every call would OOM Node's default heap. + */ + sampleEvery: number; + /** + * Override the default output path. Used by e2e tests that need isolated + * logs per test. Not user-facing. */ - sampleEvery?: number; + outputPath?: string; } /** - * Wraps a primary loader and a partner loader, runs both on every call, + * Wraps a primary loader and a partner loader, runs both on every (sampled) call, * and writes any diffs to a JSONL log. The primary's result is returned — * the partner is observation-only. * @@ -43,7 +44,6 @@ export interface LoaderDiffHarnessOptions { export class LoaderDiffHarness { private partner: WorkspaceComponentLoader | null = null; private readonly outputPath: string; - private readonly comparisonLabel: string; private readonly sampleEvery: number; private callIndex = 0; private writeFailureLogged = false; @@ -54,10 +54,11 @@ export class LoaderDiffHarness { private logger: Logger, options: LoaderDiffHarnessOptions ) { - this.outputPath = options.outputPath ?? path.join(os.tmpdir(), 'bit-loader-diff.jsonl'); - this.comparisonLabel = options.comparisonLabel; - this.sampleEvery = Math.max(1, Math.floor(options.sampleEvery ?? 1)); + this.outputPath = options.outputPath ?? path.join(os.tmpdir(), `bit-loader-diff-${process.pid}.jsonl`); + this.sampleEvery = Math.max(1, Math.floor(options.sampleEvery)); this.writeHeader(); + // eslint-disable-next-line no-console + console.error(`[loader-diff] sample 1/${this.sampleEvery} → ${this.outputPath}`); } async getMany(ids: ComponentID[], loadOpts?: ComponentLoadOptions, throwOnFailure = true): Promise { @@ -154,7 +155,7 @@ export class LoaderDiffHarness { private writeHeader(): void { this.writeLine({ header: true, - comparisonLabel: this.comparisonLabel, + sampleEvery: this.sampleEvery, startedAt: new Date().toISOString(), pid: process.pid, cwd: process.cwd(), diff --git a/scopes/workspace/workspace/workspace-component/loader-diff/index.ts b/scopes/workspace/workspace/workspace-component/loader-diff/index.ts index b51a2dbda5ce..c12d191d3eee 100644 --- a/scopes/workspace/workspace/workspace-component/loader-diff/index.ts +++ b/scopes/workspace/workspace/workspace-component/loader-diff/index.ts @@ -6,25 +6,20 @@ export { LoaderDiffHarness } from './harness'; export type { LoaderFactory, LoaderDiffHarnessOptions } from './harness'; /** - * Returns the comparison label if the diff harness is enabled, otherwise null. - * `BIT_LOADER_DIFF=1` (or `=v1-vs-v1`) → V1-vs-V1 baseline mode. - * Any other truthy value is used as the label as-is. + * Read the diff-harness configuration from `BIT_LOADER_DIFF`. One env var, one number: + * + * unset / 0 → off + * 1 → on, compare every loader call + * N (>1) → on, compare every Nth call (use this on big workspaces; running + * both loaders on every call doubles memory and can OOM Node's + * default heap) + * + * Returns null when off, otherwise the sample rate. */ -export function loaderDiffMode(): string | null { +export function loaderDiffSampleEvery(): number | null { const raw = process.env.BIT_LOADER_DIFF; - if (!raw || raw === '0' || raw.toLowerCase() === 'false') return null; - if (raw === '1' || raw.toLowerCase() === 'true') return 'v1-vs-v1'; - return raw; -} - -/** - * Returns the sample rate from `BIT_LOADER_DIFF_SAMPLE`. Default 1 (every call). - * Use larger values on workspaces big enough that running both loaders for every - * call would OOM, e.g. `BIT_LOADER_DIFF_SAMPLE=10`. - */ -export function loaderDiffSampleEvery(): number { - const raw = process.env.BIT_LOADER_DIFF_SAMPLE; - if (!raw) return 1; + if (!raw) return null; const parsed = Number.parseInt(raw, 10); - return Number.isFinite(parsed) && parsed >= 1 ? parsed : 1; + if (!Number.isFinite(parsed) || parsed <= 0) return null; + return parsed; } diff --git a/scopes/workspace/workspace/workspace.ts b/scopes/workspace/workspace/workspace.ts index 4b68f7b17880..af698849caae 100644 --- a/scopes/workspace/workspace/workspace.ts +++ b/scopes/workspace/workspace/workspace.ts @@ -112,7 +112,7 @@ import { CompFiles } from './workspace-component/comp-files'; import { Filter } from './filter'; import type { ComponentStatusLegacy, ComponentStatusResult } from './workspace-component/component-status-loader'; import { ComponentStatusLoader } from './workspace-component/component-status-loader'; -import { LoaderDiffHarness, loaderDiffMode, loaderDiffSampleEvery } from './workspace-component/loader-diff'; +import { LoaderDiffHarness, loaderDiffSampleEvery } from './workspace-component/loader-diff'; import execa from 'execa'; import { getGitExecutablePath } from '@teambit/git.modules.git-executable'; import { VERSION_ZERO } from '@teambit/objects'; @@ -267,17 +267,15 @@ export class Workspace implements ComponentFactory { ) { this.componentLoadedSelfAsAspects = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); const primaryLoader = new WorkspaceComponentLoader(this, logger, dependencyResolver, envs, aspectLoader); - const diffMode = loaderDiffMode(); - if (diffMode) { + const sampleEvery = loaderDiffSampleEvery(); + if (sampleEvery) { this.componentLoader = new LoaderDiffHarness( primaryLoader, () => new WorkspaceComponentLoader(this, logger, dependencyResolver, envs, aspectLoader), logger, - { - comparisonLabel: diffMode, - outputPath: process.env.BIT_LOADER_DIFF_OUT, - sampleEvery: loaderDiffSampleEvery(), - } + // BIT_LOADER_DIFF_OUT is an internal escape hatch for e2e tests that + // need an isolated log path. Users should not need to set it. + { sampleEvery, outputPath: process.env.BIT_LOADER_DIFF_OUT } ) as unknown as WorkspaceComponentLoader; } else { this.componentLoader = primaryLoader; From cba7d8bb447e96dd36726bc86481286d05df4fb7 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 17:23:25 -0400 Subject: [PATCH 14/17] refactor(workspace): drop redundant componentLoadedSelfAsAspects cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flag tracked "we attempted to load this as an aspect" to suppress retry of failures. AspectLoaderMain.isAspectLoaded already returns true for both successfully-loaded aspects (via harmony.get) and previously-failed ones (via the failedAspects registry populated in requireAspects). The local memoization flag was redundant — removed, along with its references in clearCache, clearComponentCache, the constructor, and loadCompsAsAspects. --- .../workspace-component-loader.ts | 60 ++++++++----------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index 0010d9e3d24b..0c39830cbe41 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -138,9 +138,11 @@ export class WorkspaceComponentLoader { // componentsExtensionsCache — scratch state for one load operation: // the merged extensions / envId for an id, // produced before the full load runs. - // componentLoadedSelfAsAspects — memoization flag (not a value cache): - // ensures each component is registered as - // an aspect at most once. + // + // Aspect-load retry suppression is delegated to AspectLoaderMain — its + // `isAspectLoaded(id)` returns true for both successfully-loaded and + // previously-failed aspects (`failedAspects` registry), so we don't need a + // local memoization flag of our own. private componentsCache: InMemoryCache; private scopeComponentsCache: InMemoryCache; private componentsExtensionsCache: InMemoryCache<{ @@ -148,7 +150,6 @@ export class WorkspaceComponentLoader { errors: Error[] | undefined; envId: string | undefined; }>; - private componentLoadedSelfAsAspects: InMemoryCache; constructor( private workspace: Workspace, private logger: Logger, @@ -159,7 +160,6 @@ export class WorkspaceComponentLoader { this.componentsCache = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); this.scopeComponentsCache = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); this.componentsExtensionsCache = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); - this.componentLoadedSelfAsAspects = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); } async getMany(ids: Array, loadOpts?: ComponentLoadOptions, throwOnFailure = true): Promise { @@ -391,33 +391,29 @@ export class WorkspaceComponentLoader { components: Component[], opts: LoadCompAsAspectsOptions = { loadApps: true, loadEnvs: true, loadAspects: true } ): Promise { + // Skip components we shouldn't load as aspects, plus those that are + // already loaded or previously failed (both reported via + // `aspectLoader.isAspectLoaded`, which returns true for failures so we + // don't retry). For everything else, decide whether the component qualifies + // as an app, env, or aspect based on its registered data. const aspectIds: string[] = []; - components.forEach((component) => { - const firstTimeToLoad = this.componentLoadedSelfAsAspects.get(component.id.toString()) === undefined; - const excluded = opts.idsToNotLoadAsAspects?.includes(component.id.toString()); - const isCore = this.aspectLoader.isCoreAspect(component.id.toStringWithoutVersion()); - const alreadyLoaded = this.aspectLoader.isAspectLoaded(component.id.toString()); - const skipLoading = excluded || isCore || alreadyLoaded || !firstTimeToLoad; - - if (skipLoading) { - return; - } + for (const component of components) { const idStr = component.id.toString(); - const appData = component.state.aspects.get('teambit.harmony/application'); - if (opts.loadApps && appData?.data?.appName) { - aspectIds.push(idStr); - this.componentLoadedSelfAsAspects.set(idStr, true); + if ( + opts.idsToNotLoadAsAspects?.includes(idStr) || + this.aspectLoader.isCoreAspect(component.id.toStringWithoutVersion()) || + this.aspectLoader.isAspectLoaded(idStr) + ) { + continue; } + const appData = component.state.aspects.get('teambit.harmony/application'); const envsData = component.state.aspects.get(EnvsAspect.id); - if (opts.loadEnvs && (envsData?.data?.services || envsData?.data?.self || envsData?.data?.type === 'env')) { - aspectIds.push(idStr); - this.componentLoadedSelfAsAspects.set(idStr, true); - } - if (opts.loadAspects && envsData?.data?.type === 'aspect') { - aspectIds.push(idStr); - this.componentLoadedSelfAsAspects.set(idStr, true); - } - }); + const isApp = opts.loadApps && appData?.data?.appName; + const isEnv = + opts.loadEnvs && (envsData?.data?.services || envsData?.data?.self || envsData?.data?.type === 'env'); + const isAspect = opts.loadAspects && envsData?.data?.type === 'aspect'; + if (isApp || isEnv || isAspect) aspectIds.push(idStr); + } if (!aspectIds.length) return; try { @@ -700,17 +696,11 @@ export class WorkspaceComponentLoader { this.componentsCache.deleteAll(); this.scopeComponentsCache.deleteAll(); this.componentsExtensionsCache.deleteAll(); - this.componentLoadedSelfAsAspects.deleteAll(); } clearComponentCache(id: ComponentID) { const idStr = id.toString(); - const cachesToClear = [ - this.componentsCache, - this.scopeComponentsCache, - this.componentsExtensionsCache, - this.componentLoadedSelfAsAspects, - ]; + const cachesToClear = [this.componentsCache, this.scopeComponentsCache, this.componentsExtensionsCache]; cachesToClear.forEach((cache) => { for (const cacheKey of cache.keys()) { if (cacheKey === idStr || cacheKey.startsWith(`${idStr}:`)) { From 516d942c5f7792107519fef734ee1567b599f33c Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 19:55:25 -0400 Subject: [PATCH 15/17] refactor(workspace): move per-operation scratch state off the loader instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two of the loader's caches (scopeComponentsCache, componentsExtensionsCache) were used as scratch state for one load operation but lived as instance fields, accumulating across calls. Now they're a per-operation LoadScratch value created at the top of getAndLoadSlotOrdered and threaded through buildLoadGroups → getAndLoadSlot → getComponentsWithoutLoadExtensions → this.get → loadOne. Reads fall back to direct workspace lookups when scratch is absent (single-component get() path), so the public surface is unchanged. Net effect on the class: Before: 4 instance caches, instance state outliving any single operation. After: 1 instance cache (componentsCache, the actual output cache). clearCache touches one thing. Scratch lifetime is visible at every call site instead of hidden in instance fields. --- .../workspace-component-loader.ts | 185 ++++++++++-------- 1 file changed, 99 insertions(+), 86 deletions(-) diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index 0c39830cbe41..2a5e626548b3 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -95,6 +95,23 @@ type WorkspaceScopeIdsMap = { workspaceIds: Map; }; +/** + * Per-operation scratch state shared between the discovery, plan-building, and + * load phases of one top-level load. Lives only as long as one `getMany` call; + * goes away when the operation ends. (Previously two of these were instance + * fields and accumulated across calls — see D-002.) + */ +type LoadScratch = { + /** id.toString() → scope-only Component (used by loadOne to populate `head`/`tags`). */ + scope: Map; + /** id.toString() → pre-resolved extensions list, errors, envId. */ + extensions: Map; +}; + +function emptyScratch(): LoadScratch { + return { scope: new Map(), extensions: new Map() }; +} + export type LoadCompAsAspectsOptions = { /** * In case the component we are loading is app, whether to load it as app (in a scope aspects capsule) @@ -124,32 +141,19 @@ export type LoadCompAsAspectsOptions = { }; export class WorkspaceComponentLoader { - // Loader caches — see D-002 in docs/rfcs/component-loading-rewrite/DECISIONS.md - // for the full invariants. Briefly: + // The only cache the loader holds across calls is `componentsCache` — final + // loaded Component objects, keyed by `${id}:${json(load-opts)}`, looked up + // via the "exact-match-or-fully-loaded" rule (see getFromCache + D-002). // - // componentsCache — final loaded Component objects. Key is - // `${id}:${json(load-opts)}`. Lookup follows - // the "exact-match-or-fully-loaded" rule - // (see getFromCache). - // scopeComponentsCache — scratch state for one load operation: - // the scope-only Component for an id. - // Read by populateScopeAndExtensionsCache - // and load-plan.ts via the loader's lookups. - // componentsExtensionsCache — scratch state for one load operation: - // the merged extensions / envId for an id, - // produced before the full load runs. + // Per-operation scratch state (the scope-only Component map and the + // merged-extensions map) lives in a `LoadScratch` value passed through the + // load pipeline — not on this instance. // // Aspect-load retry suppression is delegated to AspectLoaderMain — its // `isAspectLoaded(id)` returns true for both successfully-loaded and // previously-failed aspects (`failedAspects` registry), so we don't need a // local memoization flag of our own. private componentsCache: InMemoryCache; - private scopeComponentsCache: InMemoryCache; - private componentsExtensionsCache: InMemoryCache<{ - extensions: ExtensionDataList; - errors: Error[] | undefined; - envId: string | undefined; - }>; constructor( private workspace: Workspace, private logger: Logger, @@ -158,8 +162,6 @@ export class WorkspaceComponentLoader { private aspectLoader: AspectLoaderMain ) { this.componentsCache = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); - this.scopeComponentsCache = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); - this.componentsExtensionsCache = createInMemoryCache({ maxSize: getMaxSizeForComponents() }); } async getMany(ids: Array, loadOpts?: ComponentLoadOptions, throwOnFailure = true): Promise { @@ -230,9 +232,11 @@ export class WorkspaceComponentLoader { ): Promise { if (!ids?.length) return { components: [], invalidComponents: [] }; + // One scratch for this whole top-level operation; goes away when we return. + const scratch = emptyScratch(); const workspaceScopeIdsMap: WorkspaceScopeIdsMap = await this.groupAndUpdateIds(ids); this.logger.profileTrace('buildLoadGroups'); - const groupsToHandle = await this.buildLoadGroups(workspaceScopeIdsMap); + const groupsToHandle = await this.buildLoadGroups(workspaceScopeIdsMap, scratch); this.logger.profileTrace('buildLoadGroups'); // prefix your command with "BIT_LOG=*" to see the detailed groups if (process.env.BIT_LOG) { @@ -246,7 +250,12 @@ export class WorkspaceComponentLoader { if (!workspaceIds.length && !scopeIds.length) { throw new Error('getAndLoadSlotOrdered - group has no ids to load'); } - const res = await this.getAndLoadSlot(workspaceIds, scopeIds, { ...loadOpts, core, seeders, aspects, envs }); + const res = await this.getAndLoadSlot( + workspaceIds, + scopeIds, + { ...loadOpts, core, seeders, aspects, envs }, + scratch + ); this.logger.profileTrace(groupDesc); // We don't want to return components that were not asked originally (we do want to load them) if (!group.seeders) return undefined; @@ -265,43 +274,46 @@ export class WorkspaceComponentLoader { return finalRes; } - private async buildLoadGroups(workspaceScopeIdsMap: WorkspaceScopeIdsMap): Promise> { + private async buildLoadGroups( + workspaceScopeIdsMap: WorkspaceScopeIdsMap, + scratch: LoadScratch + ): Promise> { const wsIds = Array.from(workspaceScopeIdsMap.workspaceIds.values()); const scopeIds = Array.from(workspaceScopeIdsMap.scopeIds.values()); - // Phase 1 (side-effecting): warm the extensions cache for everything that needs to + // Phase 1 (side-effecting): warm scratch state for everything that needs to // factor into the plan. This is iterative — we have to populate non-core-envs first // to discover what extension components they reference, then populate those too. const allIds = [...wsIds, ...scopeIds]; const nonCoreEnvs = allIds.filter((id) => !this.envs.isCoreEnv(id.toStringWithoutVersion())); - await this.populateScopeAndExtensionsCache(nonCoreEnvs, workspaceScopeIdsMap); + await this.populateScopeAndExtensionsCache(nonCoreEnvs, workspaceScopeIdsMap, scratch); const allExtIds = new Map(); for (const id of nonCoreEnvs) { - const fromCache = this.componentsExtensionsCache.get(id.toString()); - if (!fromCache?.extensions) continue; - for (const ext of fromCache.extensions) { + const fromScratch = scratch.extensions.get(id.toString()); + if (!fromScratch?.extensions) continue; + for (const ext of fromScratch.extensions) { if (!allExtIds.has(ext.stringId) && ext.newExtensionId) { allExtIds.set(ext.stringId, ext.newExtensionId); } } } - await this.populateScopeAndExtensionsCache(Array.from(allExtIds.values()), workspaceScopeIdsMap); + await this.populateScopeAndExtensionsCache(Array.from(allExtIds.values()), workspaceScopeIdsMap, scratch); - // Phase 2 (pure): build the canonical group structure using cached state. + // Phase 2 (pure): build the canonical group structure using scratch state. const planInput: LoadPlanInput = { workspaceIds: wsIds, scopeIds, isCoreEnv: (id) => this.envs.isCoreEnv(id.toStringWithoutVersion()), extensionsOf: (id) => { - const fromCache = this.componentsExtensionsCache.get(id.toString()); - if (!fromCache?.extensions) return []; - return Array.from(fromCache.extensions).map((ext) => ({ + const fromScratch = scratch.extensions.get(id.toString()); + if (!fromScratch?.extensions) return []; + return Array.from(fromScratch.extensions).map((ext) => ({ stringId: ext.stringId, newExtensionId: ext.newExtensionId, })); }, - envIdOf: (id) => this.componentsExtensionsCache.get(id.toString())?.envId, + envIdOf: (id) => scratch.extensions.get(id.toString())?.envId, }; const { groups: rawGroups, extraExtensionIds } = buildLoadPlanGroups(planInput); @@ -329,12 +341,14 @@ export class WorkspaceComponentLoader { private async getAndLoadSlot( workspaceIds: ComponentID[], scopeIds: ComponentID[], - loadOpts: GetAndLoadSlotOpts + loadOpts: GetAndLoadSlotOpts, + scratch: LoadScratch ): Promise { const { workspaceComponents, scopeComponents, invalidComponents } = await this.getComponentsWithoutLoadExtensions( workspaceIds, scopeIds, - loadOpts + loadOpts, + scratch ); // If we are here it means we are on workspace, in that case we don't want to load @@ -424,35 +438,33 @@ export class WorkspaceComponentLoader { } } - private async populateScopeAndExtensionsCache(ids: ComponentID[], workspaceScopeIdsMap: WorkspaceScopeIdsMap) { + private async populateScopeAndExtensionsCache( + ids: ComponentID[], + workspaceScopeIdsMap: WorkspaceScopeIdsMap, + scratch: LoadScratch + ) { return mapSeries(ids, async (id) => { const idStr = id.toString(); - let componentFromScope; - if (!this.scopeComponentsCache.has(idStr)) { + let componentFromScope = scratch.scope.get(idStr); + if (!componentFromScope) { try { // Do not import automatically if it's missing, it will throw an error later componentFromScope = await this.workspace.scope.get(id, undefined, false); - if (componentFromScope) { - this.scopeComponentsCache.set(idStr, componentFromScope); - } - // This is fine here, as it will be handled later in the process + if (componentFromScope) scratch.scope.set(idStr, componentFromScope); } catch (err: any) { const wsAspectLoader = this.workspace.getWorkspaceAspectsLoader(); wsAspectLoader.throwWsJsoncAspectNotFoundError(err); this.logger.warn(`populateScopeAndExtensionsCache - failed loading component ${idStr} from scope`, err); } } - if (!this.componentsExtensionsCache.has(idStr) && workspaceScopeIdsMap.workspaceIds.has(idStr)) { - componentFromScope = componentFromScope || this.scopeComponentsCache.get(idStr); + if (!scratch.extensions.has(idStr) && workspaceScopeIdsMap.workspaceIds.has(idStr)) { const { extensions, errors, envId } = await this.workspace.componentExtensions( id, componentFromScope, undefined, - { - loadExtensions: false, - } + { loadExtensions: false } ); - this.componentsExtensionsCache.set(idStr, { extensions, errors, envId }); + scratch.extensions.set(idStr, { extensions, errors, envId }); } }); } @@ -492,7 +504,8 @@ export class WorkspaceComponentLoader { private async getComponentsWithoutLoadExtensions( workspaceIds: ComponentID[], scopeIds: ComponentID[], - loadOpts: GetAndLoadSlotOpts + loadOpts: GetAndLoadSlotOpts, + scratch: LoadScratch ) { const invalidComponents: InvalidComponent[] = []; const errors: { id: ComponentID; err: Error }[] = []; @@ -538,23 +551,25 @@ export class WorkspaceComponentLoader { }); const getWithCatch = (id, legacyComponent) => { - return this.get(id, legacyComponent, undefined, undefined, loadOptsWithDefaults).catch((err) => { - if (ConsumerComponent.isComponentInvalidByErrorType(err)) { - invalidComponents.push({ - id, - err, - }); - return undefined; - } - if (this.isComponentNotExistsError(err) || err instanceof ComponentNotFoundInPath) { - errors.push({ - id, - err, - }); - return undefined; + return this.get(id, legacyComponent, undefined, undefined, loadOptsWithDefaults, undefined, scratch).catch( + (err) => { + if (ConsumerComponent.isComponentInvalidByErrorType(err)) { + invalidComponents.push({ + id, + err, + }); + return undefined; + } + if (this.isComponentNotExistsError(err) || err instanceof ComponentNotFoundInPath) { + errors.push({ + id, + err, + }); + return undefined; + } + throw err; } - throw err; - }); + ); }; // await this.getConsumerComponent(id, loadOpts) @@ -636,7 +651,10 @@ export class WorkspaceComponentLoader { useCache = true, storeInCache = true, loadOpts?: ComponentLoadOptions, - getOpts: ComponentGetOneOptions = { resolveIdVersion: true } + getOpts: ComponentGetOneOptions = { resolveIdVersion: true }, + // Internal: per-operation scratch state. External callers should leave this + // undefined; loadOne will fall back to direct workspace lookups. + scratch?: LoadScratch ): Promise { const loadOptsWithDefaults: ComponentLoadOptions = Object.assign( { loadExtensions: true, executeLoadSlot: true }, @@ -655,7 +673,7 @@ export class WorkspaceComponentLoader { // in case of out-of-sync, the id may changed during the load process const updatedId = consumerComponent ? consumerComponent.id : id; - const component = await this.loadOne(updatedId, consumerComponent, loadOptsWithDefaults); + const component = await this.loadOne(updatedId, consumerComponent, loadOptsWithDefaults, scratch); if (storeInCache) { this.addMultipleEnvsIssueIfNeeded(component); // it's in storeInCache block, otherwise, it wasn't fully loaded this.saveInCache(component, loadOptsWithDefaults); @@ -694,36 +712,31 @@ export class WorkspaceComponentLoader { clearCache() { this.componentsCache.deleteAll(); - this.scopeComponentsCache.deleteAll(); - this.componentsExtensionsCache.deleteAll(); } clearComponentCache(id: ComponentID) { const idStr = id.toString(); - const cachesToClear = [this.componentsCache, this.scopeComponentsCache, this.componentsExtensionsCache]; - cachesToClear.forEach((cache) => { - for (const cacheKey of cache.keys()) { - if (cacheKey === idStr || cacheKey.startsWith(`${idStr}:`)) { - cache.delete(cacheKey); - } + for (const cacheKey of this.componentsCache.keys()) { + if (cacheKey === idStr || cacheKey.startsWith(`${idStr}:`)) { + this.componentsCache.delete(cacheKey); } - }); + } } - private async loadOne(id: ComponentID, consumerComponent?: ConsumerComponent, loadOpts?: ComponentLoadOptions) { + private async loadOne( + id: ComponentID, + consumerComponent?: ConsumerComponent, + loadOpts?: ComponentLoadOptions, + scratch?: LoadScratch + ) { const idStr = id.toString(); - const componentFromScope = this.scopeComponentsCache.has(idStr) - ? this.scopeComponentsCache.get(idStr) - : await this.workspace.scope.get(id); + const componentFromScope = scratch?.scope.get(idStr) ?? (await this.workspace.scope.get(id)); if (!consumerComponent) { if (!componentFromScope) throw new MissingBitMapComponent(id.toString()); return componentFromScope; } - const extErrorsFromCache = this.componentsExtensionsCache.has(idStr) - ? this.componentsExtensionsCache.get(idStr) - : undefined; const { extensions, errors } = - extErrorsFromCache || + scratch?.extensions.get(idStr) ?? (await this.workspace.componentExtensions(id, componentFromScope, undefined, { loadExtensions: loadOpts?.loadExtensions, })); From dfa994045471ec35c904d0d4c91b3b67c73d25e2 Mon Sep 17 00:00:00 2001 From: David First Date: Thu, 7 May 2026 20:17:11 -0400 Subject: [PATCH 16/17] docs(workspace): update D-002 + file overview to reflect cache simplification --- .../component-loading-rewrite/DECISIONS.md | 19 ++++++++++++------- .../workspace-component-loader.ts | 18 +++++++++++------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/docs/rfcs/component-loading-rewrite/DECISIONS.md b/docs/rfcs/component-loading-rewrite/DECISIONS.md index 62821ed5afed..1a3302b2208b 100644 --- a/docs/rfcs/component-loading-rewrite/DECISIONS.md +++ b/docs/rfcs/component-loading-rewrite/DECISIONS.md @@ -24,7 +24,7 @@ After tracing the actual behavior, the rules are coherent — they're just undocumented. Recording them here so future changes can preserve them deliberately rather than re-discover them. -### The four caches +### The four caches _(originally — see "Implications" below for what's left)_ | Cache | Stores | Key | Populated by | | ------------------------------ | -------------------------------- | -------------------------- | ------------------------------------- | @@ -107,12 +107,17 @@ post-load state, not the pre-load options. - Don't try to remove the fully-loaded fallback. Without it, `getMany` results never re-hit on subsequent calls with different default flags, and the cache becomes useless for the most common code paths. -- Do consider whether `componentLoadedSelfAsAspects` could be folded into a - metadata field on the Component itself, so it's not a separate cache to - invalidate. Not urgent. -- `scopeComponentsCache` and `componentsExtensionsCache` are scratch state - for one load operation. In the rewrite they should live on the load-plan - context, not on the loader instance. + +### Done after writing this entry + +- ✅ `componentLoadedSelfAsAspects` removed. `AspectLoaderMain.isAspectLoaded` + already returns true for both successfully-loaded aspects (via `harmony.get`) + and previously-failed ones (via the `failedAspects` registry), so the local + memoization flag was redundant. +- ✅ `scopeComponentsCache` and `componentsExtensionsCache` moved off the + loader instance. They're now per-operation scratch state (`LoadScratch`) + threaded through the call chain. The loader has one cache field + (`componentsCache`, the actual output cache) instead of four. ### Evidence index diff --git a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts index 2a5e626548b3..00f0348b53e3 100644 --- a/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts +++ b/scopes/workspace/workspace/workspace-component/workspace-component-loader.ts @@ -12,22 +12,26 @@ * Internal call shape (getMany): * * getMany - * ├─ getFromCache (per id) [components cache] - * └─ getAndLoadSlotOrdered (for cache misses) + * ├─ getFromCache (per id) [componentsCache] + * └─ getAndLoadSlotOrdered (for cache misses) [creates a LoadScratch] * ├─ groupAndUpdateIds → classifyIds [discovery.ts, pure] - * ├─ buildLoadGroups - * │ ├─ populateScopeAndExtensionsCache [warms scratch caches] + * ├─ buildLoadGroups (uses scratch) + * │ ├─ populateScopeAndExtensionsCache [warms scratch] * │ └─ buildLoadPlanGroups [load-plan.ts, pure] * │ ├─ groupEnvsByDepLayer [env-dag-sort.ts, pure] * │ └─ groupExtsByDepLayer [dep-dag-sort.ts, pure] - * └─ getAndLoadSlot (per group, in order) + * └─ getAndLoadSlot (per group, in order — passes scratch through) * ├─ getComponentsWithoutLoadExtensions - * │ └─ consumer.loadComponents → loadOne + * │ └─ consumer.loadComponents → get → loadOne * ├─ loadComponentsExtensions [optional] * ├─ executeLoadSlot [onComponentLoad subscribers] * └─ loadCompsAsAspects [register as Harmony aspects] * - * Caching: see D-002 in docs/rfcs/component-loading-rewrite/DECISIONS.md. + * State: `componentsCache` is the only instance-lifetime cache (the actual + * output cache). Per-operation scratch state lives in a `LoadScratch` value + * created at the top of getAndLoadSlotOrdered and threaded down to loadOne. + * + * Caching invariants: see D-002 in docs/rfcs/component-loading-rewrite/DECISIONS.md. * Recursion / load-order: see D-001 in the same file. */ From 23d34040974563b2998cd4ce56da23c9a2378ccc Mon Sep 17 00:00:00 2001 From: David First Date: Fri, 8 May 2026 10:06:20 -0400 Subject: [PATCH 17/17] fix(workspace): restore core-aspect-env special-case (was a real regression) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier "drop core-aspect-env special case" claim was wrong. The recursive env-DAG sort handles load *ordering* correctly, but the special case was also marking these envs with `core: true` — the flag that `getAndLoadSlot` checks before triggering `loadCompsAsAspects`. Without it, `bit status` on bit7 prints "environment with ID: teambit.harmony/envs/core-aspect-env was not loaded (run 'bit install')" because the env aspect wasn't registered with Harmony when components configured to use it were loaded. Restored in load-plan.ts (Step 7b) and pinned by a unit test. D-001 updated with a correction noting the misprediction. The diff harness didn't catch this because V1-vs-V1 mode compares the modified V1 against itself — they're equally wrong, so they match. Real coverage gap. --- .../component-loading-rewrite/DECISIONS.md | 18 ++++++++++++--- .../workspace-component/load-plan.spec.ts | 18 +++++++++++++++ .../workspace-component/load-plan.ts | 22 +++++++++++++++++++ 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/docs/rfcs/component-loading-rewrite/DECISIONS.md b/docs/rfcs/component-loading-rewrite/DECISIONS.md index 1a3302b2208b..69a090d73907 100644 --- a/docs/rfcs/component-loading-rewrite/DECISIONS.md +++ b/docs/rfcs/component-loading-rewrite/DECISIONS.md @@ -224,9 +224,21 @@ Treat env loading as a topological-ordering problem, not as something to 2. **Make env-DAG resolution properly recursive.** Replace `regroupEnvsIdsFromTheList`'s one-level grouping with a real topological - sort over the env-of-env DAG. The hardcoded `core-aspect-env` workaround - at `loader.ts:311-323` should fall out of this — if the sort is correct, - no special-case is needed. + sort over the env-of-env DAG. + + ~~The hardcoded `core-aspect-env` workaround at `loader.ts:311-323` should + fall out of this — if the sort is correct, no special-case is needed.~~ + + **Correction (2026-05-08):** the recursive sort gives correct _ordering_ + but the special case was doing more than ordering — it was emitting + `core-aspect-env` / `core-aspect-env-jest` in a group with `core: true`, + which is the flag `getAndLoadSlot` checks before triggering + `loadCompsAsAspects`. Without that flag, components configured to use + these envs warn "env was not loaded" because the env aspect wasn't + registered with Harmony at the right time. The special case is preserved + in `load-plan.ts` (Step 7b), and pinned by a unit test in + `load-plan.spec.ts`. Caught by inspection, not by the harness — V1-vs-V1 + comparison can't catch a regression where the V1 itself gets worse. 3. **Avoid `getEnvComponentByEnvId` during the inline enrichment of regular components.** What enrichment actually needs from the env is the env's diff --git a/scopes/workspace/workspace/workspace-component/load-plan.spec.ts b/scopes/workspace/workspace/workspace-component/load-plan.spec.ts index e7be89c0a599..2cecbc7e9d46 100644 --- a/scopes/workspace/workspace/workspace-component/load-plan.spec.ts +++ b/scopes/workspace/workspace/workspace-component/load-plan.spec.ts @@ -109,4 +109,22 @@ describe('buildLoadPlanGroups', () => { const result = buildLoadPlanGroups(inputFor([{ id: id('scope/regular@1.0.0') }])); for (const g of result.groups) expect(g.ids.length).to.be.greaterThan(0); }); + + it('emits core-aspect-env / core-aspect-env-jest as a CORE group at the front', () => { + // Removing this special case caused a regression: components configured to use + // core-aspect-env warned "env was not loaded" because the env wasn't loaded + // with the core flag, so loadCompsAsAspects was skipped. The recursive sort + // gives correct ORDER but doesn't promote the env's group to core. + const coreAspectEnv = id('teambit.harmony/envs/core-aspect-env@0.1.4'); + const userComp: MockComp = { + id: id('teambit.envs/env@1.0.0'), + envId: coreAspectEnv.toStringWithoutVersion(), + extensions: [{ stringId: coreAspectEnv.toString(), newExtensionId: coreAspectEnv }], + }; + const result = buildLoadPlanGroups(inputFor([userComp])); + const firstGroup = result.groups[0]; + expect(firstGroup.core, 'first group should be core: true').to.equal(true); + expect(firstGroup.envs, 'first group should be envs: true').to.equal(true); + expect(firstGroup.ids.map((c) => c.toStringWithoutVersion())).to.include('teambit.harmony/envs/core-aspect-env'); + }); }); diff --git a/scopes/workspace/workspace/workspace-component/load-plan.ts b/scopes/workspace/workspace/workspace-component/load-plan.ts index 3b38b4dc4054..e18cde7db4a6 100644 --- a/scopes/workspace/workspace/workspace-component/load-plan.ts +++ b/scopes/workspace/workspace/workspace-component/load-plan.ts @@ -127,8 +127,30 @@ export function buildLoadPlanGroups(input: LoadPlanInput): BuildLoadPlanResult { return out; }); + // Step 7b: pull `core-aspect-env` and `core-aspect-env-jest` out as a *core* + // group and prepend it. Without this they get loaded later with `core: false`, + // and `getAndLoadSlot` only triggers `loadCompsAsAspects` on a core+aspects + // group (or a seeders-as-aspects group). Components configured to use these + // envs then warn "env was not loaded" because the env aspect wasn't + // registered with Harmony at the right time. The recursive env-DAG sort + // gives correct *ordering*; this special case is about the *core flag*. + // (D-001 originally claimed this special case was redundant; it isn't.) + const CORE_ASPECT_ENV_IDS = new Set([ + 'teambit.harmony/envs/core-aspect-env', + 'teambit.harmony/envs/core-aspect-env-jest', + ]); + const coreAspectEnvIds: ComponentID[] = []; + for (const layer of layeredEnvs) { + for (const envId of layer) { + if (CORE_ASPECT_ENV_IDS.has(envId.toStringWithoutVersion())) coreAspectEnvIds.push(envId); + } + } + // Step 8: assemble groups in canonical order. const rawGroups: RawLoadGroup[] = [ + ...(coreAspectEnvIds.length > 0 + ? [{ ids: coreAspectEnvIds, core: true, aspects: true, seeders: true, envs: true }] + : []), { ids: coreEnvs, core: true, aspects: true, seeders: true, envs: true }, ...layeredEnvs.map((ids) => ({ ids, core: false, aspects: true, seeders: true, envs: true })), { ids: extraExtensionIds, core: false, aspects: true, seeders: false, envs: false },