Skip to content

Commit 8d40fe5

Browse files
committed
feat: persist context and harness governance artifacts
1 parent 9a42faf commit 8d40fe5

7 files changed

Lines changed: 408 additions & 8 deletions

File tree

apps/orchestrator/src/cortexpilot_orch/scheduler/completion_governance.py

Lines changed: 183 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,163 @@ def _collect_policy_action(
105105
return ""
106106

107107

108+
def _extract_thread_id(
109+
*,
110+
contract: dict[str, Any],
111+
task_result: dict[str, Any] | None,
112+
run_dir: Path,
113+
) -> str:
114+
if isinstance(task_result, dict):
115+
evidence_refs = task_result.get("evidence_refs")
116+
if isinstance(evidence_refs, dict):
117+
thread_id = str(evidence_refs.get("thread_id") or evidence_refs.get("codex_thread_id") or "").strip()
118+
if thread_id:
119+
return thread_id
120+
assigned_agent = contract.get("assigned_agent")
121+
if isinstance(assigned_agent, dict):
122+
thread_id = str(assigned_agent.get("codex_thread_id") or "").strip()
123+
if thread_id:
124+
return thread_id
125+
return run_dir.name
126+
127+
128+
def _detect_context_pack_trigger(
129+
*,
130+
failure_reason: str,
131+
reply_auditor: dict[str, Any],
132+
) -> str:
133+
normalized_reason = failure_reason.lower()
134+
trigger_pairs = [
135+
("context_pressure", ["context pressure", "context limit", "token limit"]),
136+
("contamination", ["contamination", "context contamination", "poisoned context"]),
137+
("role_switch", ["role switch", "handoff to another role"]),
138+
("phase_switch", ["phase switch", "stage switch", "next phase"]),
139+
("repetition", ["repetition", "repeat", "looping reply"]),
140+
("distortion", ["distortion", "garbled", "misread"]),
141+
]
142+
for trigger, phrases in trigger_pairs:
143+
if any(phrase in normalized_reason for phrase in phrases):
144+
return trigger
145+
signals = reply_auditor.get("signals")
146+
if isinstance(signals, list):
147+
normalized_signals = [str(item).strip().lower() for item in signals if str(item).strip()]
148+
if any("repetition" in item for item in normalized_signals):
149+
return "repetition"
150+
if any("contamination" in item for item in normalized_signals):
151+
return "contamination"
152+
return ""
153+
154+
155+
def _build_context_pack_artifact(
156+
*,
157+
contract: dict[str, Any],
158+
run_dir: Path,
159+
failure_reason: str,
160+
continuation_summary: str,
161+
reply_auditor: dict[str, Any],
162+
) -> dict[str, Any] | None:
163+
trigger_reason = _detect_context_pack_trigger(
164+
failure_reason=failure_reason,
165+
reply_auditor=reply_auditor,
166+
)
167+
if not trigger_reason:
168+
return None
169+
assigned_agent = contract.get("assigned_agent") if isinstance(contract.get("assigned_agent"), dict) else {}
170+
objective = str(contract.get("objective") or "").strip() or "Continue the current scope safely."
171+
source_role = str(assigned_agent.get("role") or "WORKER").strip() or "WORKER"
172+
thread_id = _extract_thread_id(contract=contract, task_result=None, run_dir=run_dir)
173+
return {
174+
"version": "v1",
175+
"pack_id": f"ctx-pack-{run_dir.name}",
176+
"role_scope": "L1",
177+
"source_session_id": thread_id,
178+
"source_role": source_role,
179+
"trigger_reason": trigger_reason,
180+
"global_state_summary": (
181+
f"The current run for '{objective}' hit a {trigger_reason} fallback condition and needs an explicit handoff."
182+
),
183+
"actor_handoff_summary": continuation_summary,
184+
"required_reads": [
185+
"contract.json",
186+
"reports/task_result.json",
187+
"reports/completion_governance_report.json",
188+
],
189+
"optional_reads": [
190+
"reports/review_report.json",
191+
"reports/test_report.json",
192+
"events.jsonl",
193+
],
194+
"conversation_exports": ["events.jsonl"],
195+
"artifact_refs": [
196+
"reports/task_result.json",
197+
"reports/completion_governance_report.json",
198+
],
199+
}
200+
201+
202+
def _derive_harness_request_artifact(
203+
*,
204+
contract: dict[str, Any],
205+
task_result: dict[str, Any] | None,
206+
run_dir: Path,
207+
) -> tuple[dict[str, Any] | None, str]:
208+
if not isinstance(task_result, dict):
209+
return None, "not_requested"
210+
gates = task_result.get("gates")
211+
if not isinstance(gates, dict):
212+
return None, "not_requested"
213+
policy_gate = gates.get("policy_gate")
214+
if not isinstance(policy_gate, dict):
215+
return None, "not_requested"
216+
violations = policy_gate.get("violations")
217+
if not isinstance(violations, list) or not violations:
218+
return None, "not_requested"
219+
normalized_violations = [str(item).strip() for item in violations if str(item).strip()]
220+
if not normalized_violations:
221+
return None, "not_requested"
222+
223+
project_level_signals = {"network_gate", "mcp_gate", "human_approval_required"}
224+
scope = "project-local" if any(item in project_level_signals for item in normalized_violations) else "session-local"
225+
approval_state = "approval_required" if scope == "project-local" else "auto_approved"
226+
assigned_agent = contract.get("assigned_agent") if isinstance(contract.get("assigned_agent"), dict) else {}
227+
runtime_options = contract.get("runtime_options") if isinstance(contract.get("runtime_options"), dict) else {}
228+
229+
requested_capabilities = {
230+
"skills": ["continuation-hardening"],
231+
"mcp_servers": ["runtime-governance"] if "mcp_gate" in normalized_violations else [],
232+
"permission_changes": [],
233+
"runtime_bindings": [str(runtime_options.get("provider") or runtime_options.get("runner") or "codex")],
234+
}
235+
if "network_gate" in normalized_violations:
236+
requested_capabilities["permission_changes"].append("network.allow")
237+
if "tool_gate" in normalized_violations:
238+
requested_capabilities["permission_changes"].append("tool.allow")
239+
if "sampling_gate" in normalized_violations:
240+
requested_capabilities["permission_changes"].append("sampling.allow")
241+
if "human_approval_required" in normalized_violations:
242+
requested_capabilities["permission_changes"].append("approval.resume")
243+
244+
request = {
245+
"version": "v1",
246+
"request_id": f"harness-{run_dir.name}",
247+
"scope": scope,
248+
"requested_by": {
249+
"role": str(assigned_agent.get("role") or "WORKER").strip() or "WORKER",
250+
"agent_id": str(assigned_agent.get("agent_id") or "agent-1").strip() or "agent-1",
251+
},
252+
"reason": (
253+
"Runtime completion governance detected policy-gate blockers and generated a harness evolution request "
254+
f"for {', '.join(normalized_violations)}."
255+
),
256+
"requested_capabilities": requested_capabilities,
257+
"risk_level": "medium" if scope == "project-local" else "low",
258+
"approval_required": scope != "session-local",
259+
"rollback_plan": "Remove the temporary capability request and restore the current runtime/tool permission posture.",
260+
"validation_plan": "Rerun repo hygiene, targeted runtime tests, and the affected operator read-back after apply.",
261+
}
262+
return request, approval_state
263+
264+
108265
def _build_dod_checker(
109266
*,
110267
required_checks: list[str],
@@ -252,6 +409,19 @@ def evaluate_completion_governance(
252409
overall_verdict = "manual_triage"
253410
continuation_summary = "Completion governance could not select a safe automatic continuation path."
254411

412+
context_pack_artifact = _build_context_pack_artifact(
413+
contract=contract,
414+
run_dir=run_dir,
415+
failure_reason=failure_reason,
416+
continuation_summary=continuation_summary,
417+
reply_auditor=reply_auditor,
418+
)
419+
harness_request_artifact, harness_policy_state = _derive_harness_request_artifact(
420+
contract=contract,
421+
task_result=task_result,
422+
run_dir=run_dir,
423+
)
424+
255425
report = {
256426
"report_type": "completion_governance_report",
257427
"generated_at": generated_at,
@@ -269,12 +439,20 @@ def evaluate_completion_governance(
269439
"summary": continuation_summary,
270440
},
271441
"context_pack": {
272-
"status": "not_wired",
273-
"summary": "Context Pack remains fallback-only, but no runtime producer/consumer is wired into finalize_run yet.",
442+
"status": "generated" if context_pack_artifact else "not_requested",
443+
"summary": (
444+
f"Generated {context_pack_artifact['pack_id']} for fallback handoff."
445+
if context_pack_artifact
446+
else "No fallback Context Pack was requested for this run."
447+
),
274448
},
275449
"harness_request": {
276-
"status": "not_wired",
277-
"summary": "Harness Request has a schema home, but no request/apply lifecycle is wired into this run finalizer yet.",
450+
"status": harness_policy_state,
451+
"summary": (
452+
f"Generated {harness_request_artifact['request_id']} with {harness_policy_state} policy verdict."
453+
if harness_request_artifact
454+
else "No harness evolution request was needed for this run."
455+
),
278456
},
279457
}
280-
return report, updated_unblock_tasks
458+
return report, updated_unblock_tasks, context_pack_artifact, harness_request_artifact

apps/orchestrator/src/cortexpilot_orch/scheduler/scheduler_bridge_finalize.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,12 @@ def finalize_run(
277277
if not final_task_result.get("summary") and failure_reason:
278278
final_task_result["summary"] = failure_reason
279279

280-
completion_governance_report, updated_unblock_tasks = completion_governance.evaluate_completion_governance(
280+
(
281+
completion_governance_report,
282+
updated_unblock_tasks,
283+
context_pack_artifact,
284+
harness_request_artifact,
285+
) = completion_governance.evaluate_completion_governance(
281286
contract=contract,
282287
run_dir=run_dir,
283288
task_result=final_task_result if isinstance(final_task_result, dict) else task_result,
@@ -320,6 +325,43 @@ def finalize_run(
320325
"meta": {"unblock_task_id": selected_unblock_task_id},
321326
},
322327
)
328+
if context_pack_artifact is not None:
329+
store.write_artifact(
330+
run_id,
331+
"context_pack.json",
332+
json.dumps(context_pack_artifact, ensure_ascii=False, indent=2),
333+
)
334+
store.append_event(
335+
run_id,
336+
{
337+
"level": "INFO",
338+
"event": "CONTEXT_PACK_GENERATED",
339+
"run_id": run_id,
340+
"meta": {
341+
"pack_id": context_pack_artifact.get("pack_id"),
342+
"trigger_reason": context_pack_artifact.get("trigger_reason"),
343+
},
344+
},
345+
)
346+
if harness_request_artifact is not None:
347+
store.write_artifact(
348+
run_id,
349+
"harness_request.json",
350+
json.dumps(harness_request_artifact, ensure_ascii=False, indent=2),
351+
)
352+
store.append_event(
353+
run_id,
354+
{
355+
"level": "INFO",
356+
"event": "HARNESS_REQUEST_CREATED",
357+
"run_id": run_id,
358+
"meta": {
359+
"request_id": harness_request_artifact.get("request_id"),
360+
"scope": harness_request_artifact.get("scope"),
361+
"approval_required": harness_request_artifact.get("approval_required"),
362+
},
363+
},
364+
)
323365
if isinstance(final_task_result, dict):
324366
continuation_decision = completion_governance_report.get("continuation_decision", {})
325367
if isinstance(continuation_decision, dict):

apps/orchestrator/tests/test_completion_governance_runtime.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def test_evaluate_completion_governance_marks_complete_when_required_checks_pass
4141
test_report = {"status": "PASS"}
4242
review_report = {"verdict": "PASS"}
4343

44-
report, updated_unblock_tasks = evaluate_completion_governance(
44+
report, updated_unblock_tasks, context_pack_artifact, harness_request_artifact = evaluate_completion_governance(
4545
contract={"acceptance_tests": [{"cmd": ["pytest", "-q"]}]},
4646
run_dir=run_dir,
4747
task_result=task_result,
@@ -57,6 +57,8 @@ def test_evaluate_completion_governance_marks_complete_when_required_checks_pass
5757
assert report["reply_auditor"]["status"] == "accepted"
5858
assert report["continuation_decision"]["selected_action"] == "none"
5959
assert updated_unblock_tasks is None
60+
assert context_pack_artifact is None
61+
assert harness_request_artifact is None
6062

6163

6264
def test_evaluate_completion_governance_queues_unblock_task_for_blocked_run(tmp_path: Path) -> None:
@@ -108,7 +110,7 @@ def test_evaluate_completion_governance_queues_unblock_task_for_blocked_run(tmp_
108110
test_report = {"status": "FAIL"}
109111
review_report = {"verdict": "PASS"}
110112

111-
report, updated_unblock_tasks = evaluate_completion_governance(
113+
report, updated_unblock_tasks, context_pack_artifact, harness_request_artifact = evaluate_completion_governance(
112114
contract={},
113115
run_dir=run_dir,
114116
task_result=task_result,
@@ -125,3 +127,54 @@ def test_evaluate_completion_governance_queues_unblock_task_for_blocked_run(tmp_
125127
assert report["continuation_decision"]["unblock_task_id"] == "unblock-worker-1"
126128
assert updated_unblock_tasks is not None
127129
assert updated_unblock_tasks[0]["status"] == "queued"
130+
assert context_pack_artifact is None
131+
assert harness_request_artifact is None
132+
133+
134+
def test_evaluate_completion_governance_generates_context_pack_and_harness_request(tmp_path: Path) -> None:
135+
run_dir = tmp_path / "run-follow-up"
136+
_write_artifact(
137+
run_dir,
138+
"planning_worker_prompt_contracts.json",
139+
[
140+
{
141+
"prompt_contract_id": "worker-ctx",
142+
"assigned_agent": {"role": "WORKER", "agent_id": "agent-ctx"},
143+
"done_definition": {"acceptance_checks": ["repo_hygiene", "test_report"]},
144+
"continuation_policy": {
145+
"on_incomplete": "reply_auditor_reprompt_and_continue_same_session",
146+
"on_blocked": "spawn_independent_temporary_unblock_task",
147+
},
148+
}
149+
],
150+
)
151+
task_result = {
152+
"status": "FAILED",
153+
"summary": "policy gate blocked the run",
154+
"gates": {
155+
"diff_gate": {"passed": True},
156+
"policy_gate": {"passed": False, "violations": ["mcp_gate"]},
157+
"review_gate": {"passed": True},
158+
"tests_gate": {"passed": False},
159+
},
160+
"evidence_refs": {"thread_id": "thread-ctx"},
161+
}
162+
report, updated_unblock_tasks, context_pack_artifact, harness_request_artifact = evaluate_completion_governance(
163+
contract={"assigned_agent": {"role": "WORKER", "agent_id": "agent-ctx", "codex_thread_id": "thread-ctx"}},
164+
run_dir=run_dir,
165+
task_result=task_result,
166+
test_report={"status": "FAIL"},
167+
review_report={"verdict": "PASS"},
168+
status="FAILURE",
169+
failure_reason="context contamination while mcp_gate blocked the run",
170+
generated_at="2026-04-12T21:30:00Z",
171+
)
172+
173+
assert report["context_pack"]["status"] == "generated"
174+
assert context_pack_artifact is not None
175+
assert context_pack_artifact["trigger_reason"] == "contamination"
176+
assert report["harness_request"]["status"] == "approval_required"
177+
assert harness_request_artifact is not None
178+
assert harness_request_artifact["scope"] == "project-local"
179+
assert harness_request_artifact["approval_required"] is True
180+
assert updated_unblock_tasks is None

0 commit comments

Comments
 (0)