TangleML · maxy-shpfy · May 28, 2026
@@ -0,0 +1,57 @@
+/**
+ * Debug-assistant sub-agent — diagnoses failed pipeline runs.
+ *
+ * Read-only by design: the agent inspects pipeline state, run metadata,
+ * execution details, container state, and truncated logs. It cannot
+ * mutate the spec or submit runs — those capabilities belong to
+ * `pipeline-repair`, which the dispatcher orchestrates separately.
+ *
+ * The session's `recentRuns` are appended to the system prompt at agent
+ * creation time (per turn) so the model can resolve "my last run" /
+ * "the latest run" without an extra tool call.
+ */
+import { Agent } from "@openai/agents";
+
+import { requireOrchestratorModel } from "../../config";
+import { attachObservabilityHooks } from "../../middleware/observability";
+import debugAssistantPrompt from "../../prompts/debugAssistant.md?raw";
+import type { AgentSession, RecentPipelineRun } from "../../session";
+import { createCsomTools } from "../../tools/csomTools";
+import { createDebugTools } from "../../tools/debugTools";
+import { createRunTools } from "../../tools/runTools";
+
+const RECENT_RUNS_PROMPT_LIMIT = 5;
+
+function formatRecentRunsSection(runs: RecentPipelineRun[]): string {
+  if (runs.length === 0) return "## Recent runs\n\nNo recent runs available.";
+  const lines = runs.slice(0, RECENT_RUNS_PROMPT_LIMIT).map((run) => {
+    const status = run.status ? ` — status: ${run.status}` : "";
+    return `- run ${run.id} (root execution ${run.root_execution_id}, created ${run.created_at})${status}`;
+  });
+  return `## Recent runs\n\n${lines.join("\n")}`;
+}
+
+export function createDebugAssistantAgent(session: AgentSession): Agent {
+  const csom = createCsomTools(session.bridge);
+  const runTools = createRunTools(session.bridge);
+  const debugTools = createDebugTools(session.bridge);
+
+  const instructions = `${debugAssistantPrompt}\n\n${formatRecentRunsSection(session.recentRuns)}`;
+
+  const agent = new Agent({
+    name: "debug-assistant",
+    handoffDescription: `Diagnose failed pipeline runs and explain root causes from execution details and container logs.
+      Read-only — cannot edit the pipeline or submit runs. Use for "why did my run fail", "what went wrong with run X",
+      "show me the error from the latest run".`,
+    instructions,
+    tools: [
+      csom.getPipelineState,
+      runTools.getRunStatus,
+      runTools.debugPipelineRun,
+      ...debugTools.allTools,
+    ],
+    model: requireOrchestratorModel(),
+  });
+  attachObservabilityHooks(agent, session.emitStatus);
+  return agent;
+}
@@ -11,16 +11,18 @@ import { attachObservabilityHooks } from "../../middleware/observability";
 import pipelineRepairPrompt from "../../prompts/pipelineRepair.md?raw";
 import type { AgentSession } from "../../session";
 import { createCsomTools } from "../../tools/csomTools";
+import { createRunTools } from "../../tools/runTools";
 
 export function createPipelineRepairAgent(session: AgentSession): Agent {
   const csom = createCsomTools(session.bridge);
+  const runTools = createRunTools(session.bridge);
   const agent = new Agent({
     name: "pipeline-repair",
-    handoffDescription: `Diagnose and fix validation issues, broken connections, missing inputs, and other 
-      structural problems in existing pipelines. Can mutate the pipeline via CSOM tools. 
-      Asks the user for input when fixes are ambiguous.`,
+    handoffDescription: `Diagnose and fix validation issues, broken connections, missing inputs, and other
+      structural problems in existing pipelines. Can mutate the pipeline via CSOM tools and submit a run
+      after a successful fix when the user asks. Asks the user for input when fixes are ambiguous.`,
     instructions: pipelineRepairPrompt,
-    tools: csom.allTools,
+    tools: [...csom.allTools, runTools.submitPipelineRun],
     model: requireOrchestratorModel(),
   });
   attachObservabilityHooks(agent, session.emitStatus);

@@ -1,18 +1,25 @@
 /**
  * Top-level dispatcher agent for the in-browser AI assistant.
  *
- * The dispatcher itself does not perform end-user tasks. It classifies
- * the user's intent and hands off to the specialist sub-agent registered
- * for that intent. Each sub-agent is session-scoped, so the dispatcher Agent is rebuilt on
- * every turn.
+ * The dispatcher is the only top-level agent in the system. It owns
+ * orchestration: it never edits the spec or fetches runs directly,
+ * instead it calls specialist sub-agents that are exposed as *tools*
+ * via the `Agent.asTool(...)` adapter. The dispatcher's own LLM loop
+ * is what chains those tool calls together for multi-step requests
+ * (e.g. "investigate AND fix" needs `ask_debug_assistant` followed by
+ * `ask_pipeline_repair`).
+ *
+ * The dispatcher Agent and its specialist tool wrappers are rebuilt
+ * on every turn because the underlying sub-agents close over the
+ * per-turn `AgentSession` (bridge, recent runs, status emitter).
  */
 import { Agent, MemorySession, run } from "@openai/agents";
-import { RECOMMENDED_PROMPT_PREFIX } from "@openai/agents-core/extensions";
 
 import { requireOrchestratorModel } from "../config";
 import { attachObservabilityHooks } from "../middleware/observability";
 import dispatcherPrompt from "../prompts/dispatcher.md?raw";
 import type { AgentSession } from "../session";
+import { createDebugAssistantAgent } from "./subagents/debugAssistant";
 import { createGeneralHelpAgent } from "./subagents/generalHelp";
 import { createPipelineRepairAgent } from "./subagents/pipelineRepair";
 
@@ -33,14 +40,30 @@ export interface TangleDispatcher {
 }
 
 function createDispatcherAgent(session: AgentSession): Agent {
-  const agent = Agent.create({
+  const generalHelp = createGeneralHelpAgent(session);
+  const pipelineRepair = createPipelineRepairAgent(session);
+  const debugAssistant = createDebugAssistantAgent(session);
+
+  const agent = new Agent({
     name: "tangle-dispatcher",
     model: requireOrchestratorModel(),
-    instructions: `${RECOMMENDED_PROMPT_PREFIX}\n\n${dispatcherPrompt}`,
-    tools: [],
-    handoffs: [
-      createGeneralHelpAgent(session),
-      createPipelineRepairAgent(session),
+    instructions: dispatcherPrompt,
+    tools: [
+      generalHelp.asTool({
+        toolName: "ask_general_help",
+        toolDescription:
+          "Ask the general-help specialist a question about Tangle concepts, features, how things work, best practices, getting started, or documentation lookups. Input: the user's question phrased as a clear, standalone question.",
+      }),
+      pipelineRepair.asTool({
+        toolName: "ask_pipeline_repair",
+        toolDescription:
+          "Ask the pipeline-repair specialist to inspect, validate, or fix the user's currently-open pipeline, or to apply a specific CSOM mutation directive. Can also submit a pipeline run after a successful fix when the user asked. Input: a clear directive. For open-ended repair use 'Validate and fix the current pipeline.'. For a targeted fix already identified by debug-assistant, pass the exact directive, e.g. 'Set the `label_column_name` input on [Train XGBoost model on CSV](entity://task-abc123) from \"unexistent\" to \"tips\".'. Add 'and resubmit the run' to the input only if the user explicitly asked to rerun.",
+      }),
+      debugAssistant.asTool({
+        toolName: "ask_debug_assistant",
+        toolDescription:
+          "Ask the debug-assistant specialist to diagnose a failed pipeline run from execution details, container state, and logs. Read-only — cannot edit the spec or submit runs. Input: a clear question, e.g. 'Investigate the latest failed run and identify the root cause and the specific fix needed.' or 'Why did run 12345 fail?'.",
+      }),
     ],
   });
   attachObservabilityHooks(agent, session.emitStatus);

@@ -6,9 +6,10 @@
  * the raw events into short status strings and forward them to the
  * main thread through the Comlink-proxied status callback.
  *
- * Wire this on EVERY agent. Once an agent is active after a handoff, only its
- * own hooks fire, not the dispatcher's. Without per-agent wiring the
- * status line freezes mid-conversation.
+ * Wire this on EVERY agent. Specialist sub-agents are invoked as nested
+ * runs via `Agent.asTool(...)`, and inside those nested runs only the
+ * sub-agent's own hooks fire — without per-agent wiring the status line
+ * freezes while a specialist is working.
  */
 import type { Agent } from "@openai/agents";
 
@@ -39,8 +40,18 @@ const TOOL_STATUS_LABELS: Record<string, string> = {
   get_execution_details: "Fetching execution details...",
   get_container_state: "Inspecting container state...",
   get_container_log: "Fetching container logs...",
+  // Specialist sub-agents wrapped via `Agent.asTool(...)`. The dispatcher
+  // fires `agent_tool_start` with these names whenever it delegates to a
+  // specialist; the legacy `agent_handoff` event no longer fires because
+  // the dispatcher has no handoffs anymore.
+  ask_general_help: "Looking up information...",
+  ask_pipeline_repair: "Asking pipeline-repair...",
+  ask_debug_assistant: "Analyzing run failure...",
 };
 
+// Retained for the (hypothetical) case where a sub-agent itself uses
+// handoffs internally. The dispatcher no longer does — its specialists
+// are exposed as asTool wrappers, see `ask_*` entries above.
 const SUB_AGENT_LABELS: Record<string, string> = {
   "pipeline-architect": "Building pipeline...",
   "pipeline-repair": "Repairing pipeline...",

@@ -0,0 +1,54 @@
+# Debug Assistant — System Prompt
+
+You are the **Debug Assistant** specialist for Tangle Pipeline Studio. Your job is to diagnose **failed runs** of the user's pipeline and explain root causes. You are read-only — you cannot edit the pipeline or run anything.
+
+## Your Workflow
+
+1. Identify the run the user is asking about. They may name a run id explicitly. If they say "my last run", "the latest run", "recent run", consult the **Recent runs** section appended below this prompt and pick the most recent entry.
+2. If you have no run id and the recent runs list is empty, reply with a short message such as: "I don't see any runs for this pipeline yet — submit one and I'll be able to debug it."
+3. Call `debug_pipeline_run(runId)` first. It returns the run, an overall status, and a truncated snapshot of every FAILED / SYSTEM_ERROR / INVALID child execution (container state, exit code, execution details, log tail). This is your highest-signal call — do it before anything else.
+4. If the snapshot covered the failure, summarize the root cause and stop.
+5. If you need more detail on a specific child, use the fine-grained tools:
+   - `get_execution_details(executionId)` — task spec + parent/child ids.
+   - `get_execution_state(executionId)` — aggregated child status counts (useful when a failed child is itself a graph).
+   - `get_container_state(executionId)` — pod/container state, exit code, debug info.
+   - `get_container_log(executionId)` — trailing 8KB of stdout/stderr + captured error messages.
+6. If the failure is not in the failed-children snapshot (e.g. an orchestration error or pre-launch failure), look at `run.annotations`, `rootStatus`, and the root execution log to explain.
+7. If `get_pipeline_state` would help you point at a specific task in the user's spec by id, call it once.
+
+## Recommending a fix
+
+You have no CSOM mutation tools and you do not call other specialists yourself — the dispatcher orchestrates that. When your diagnosis points to a concrete fix, your job is to **state it unambiguously** so the dispatcher can route it to `pipeline-repair`:
+
+- If the diagnosis identifies a **single, concrete CSOM mutation** (typically a wrong input value that needs `set_task_argument`, or an obvious orphan binding that needs `delete_edge`), end your response with a one-line `Fix to apply:` directive that names the entity (with its `entity://$id` link), the input port, the current value, and the proposed value. Example:
+  > Fix to apply: set `label_column_name` on [Train XGBoost model on CSV](entity://task-abc123) from `"unexistent"` to `"tips"`.
+- If you do not already have the task's `$id`, call `get_pipeline_state` once to resolve it so the directive includes a real entity link.
+- If the fix is **ambiguous, requires user input, or spans multiple tasks**, do NOT emit a `Fix to apply:` directive. Instead describe the options and stop so the user can choose.
+- If you could not isolate a single mutation that would resolve the failure, say so — do not guess.
+
+## Out of scope
+
+- **Editing the pipeline directly.** You have no CSOM mutation tools. Emit a clear `Fix to apply:` directive (or describe the options) and stop.
+- **Submitting runs.** That is a `pipeline-repair` capability. The dispatcher decides whether to resubmit based on the user's original message.
+- **Building new pipelines.** Out of scope for the beta.
+
+## Response Formatting
+
+When referring to pipeline tasks/inputs/outputs, use the entity link format so the UI can render them as interactive chips:
+
+```
+[Entity Name](entity://$id)
+```
+
+When referring to a run, use a plain run id reference (the UI does not render run chips today). Always quote log excerpts in fenced code blocks. Keep log excerpts short — one or two lines around the error is plenty; the user can ask for the full log if they want.
+
+## Response Style
+
+Be diagnostic and concrete. For each failed task:
+
+1. State which task failed (with its entity link if you have it).
+2. State the proximate failure (exit code, exception, orchestration error).
+3. Give the most likely root cause from the log/details.
+4. If a fix is obvious, end with a single `Fix to apply:` line per the **Recommending a fix** rules so the dispatcher can route it. Otherwise describe the options and stop.
+
+Always cite the run id you investigated.