sf snapshot: uncommitted changes after 30m inactivity

2026-05-10 03:21:24 +02:00 · 2026-05-10 03:21:24 +02:00 · 6b7d327672
commit 6b7d327672
parent 1a681caa86
14 changed files with 216 additions and 52 deletions
--- a/.sf/backups/db/sf.db.2026-05-10T00-25-03-634Z
+++ b/.sf/backups/db/sf.db.2026-05-10T00-25-03-634Z
--- a/.sf/backups/db/sf.db.2026-05-10T00-49-50-037Z
+++ b/.sf/backups/db/sf.db.2026-05-10T00-49-50-037Z
--- a/.sf/backups/db/sf.db.2026-05-10T01-08-44-324Z
+++ b/.sf/backups/db/sf.db.2026-05-10T01-08-44-324Z
--- a/.sf/metrics.db
+++ b/.sf/metrics.db
--- a/.sf/model-performance.json
+++ b/.sf/model-performance.json
@ -10,5 +10,27 @@
      "successRate": 1,
      "total": 4
    }
+  },
+  "plan-slice": {
+    "zai/glm-4.5": {
+      "successes": 1,
+      "failures": 0,
+      "timeouts": 0,
+      "totalTokens": 0,
+      "totalCost": 0,
+      "lastUsed": "2026-05-10T00:25:29.268Z",
+      "successRate": 1,
+      "total": 1
+    },
+    "minimax/MiniMax-M2.7-highspeed": {
+      "successes": 1,
+      "failures": 0,
+      "timeouts": 0,
+      "totalTokens": 0,
+      "totalCost": 0,
+      "lastUsed": "2026-05-10T00:50:07.124Z",
+      "successRate": 1,
+      "total": 1
+    }
  }
 }
--- a/src/resources/extensions/mcp-client/index.js
+++ b/src/resources/extensions/mcp-client/index.js
@ -287,6 +287,17 @@ function formatToolList(serverName, tools) {
 	return lines.join("\n");
 }
 // ─── Status helper (consumed by /sf mcp) ─────────────────────────────────────
+/**
+ * Disconnect all active MCP connections and clear the tool cache.
+ * Servers will lazily reconnect on the next mcp_discover or mcp_call.
+ *
+ * Purpose: allow /mcp reload to pick up config changes without a full restart.
+ * Consumer: /mcp reload command handler in commands-mcp-status.js.
+ */
+export async function disconnectAll() {
+	await closeAll();
+}
+
 /**
 * Return the live connection status for a named MCP server.
 * Safe to call even when the server has never been connected.
--- a/src/resources/extensions/sf-tui/footer.js
+++ b/src/resources/extensions/sf-tui/footer.js
@ -104,7 +104,7 @@ function getSessionStats(ctx) {
 }
 export function renderFooter(_theme, footerData, ctx, width) {
 	const git = refreshGitStatus(process.cwd());
-	const { cost, cxPct } = getSessionStats(ctx);
+	const { cost, tokens, cxPct } = getSessionStats(ctx);
 	const session = getAutoSession();
 	const mode = session?.getMode?.();
 	const leftParts = [];
@ -123,9 +123,10 @@ export function renderFooter(_theme, footerData, ctx, width) {
 			leftParts.push(chip("diff", `+${git.added}/-${git.deleted}`, "warning"));
 		}
 		if (git.ahead || git.behind) {
-			leftParts.push(
-				chip("sync", `${git.ahead} ahead ${git.behind} behind`, "warning"),
-			);
+			const syncParts = [];
+			if (git.ahead) syncParts.push(`↑${git.ahead}`);
+			if (git.behind) syncParts.push(`↓${git.behind}`);
+			leftParts.push(chip("sync", syncParts.join(" "), "warning"));
 		}
 		if (git.lastCommit) {
 			leftParts.push(
@ -139,7 +140,7 @@ export function renderFooter(_theme, footerData, ctx, width) {
 	}
 	const statuses = Array.from(footerData.getExtensionStatuses().entries())
 		.sort(([a], [b]) => a.localeCompare(b))
-		.map(([, text]) => text.trim())
+		.map(([, text]) => String(text ?? "").trim())
 		.filter(Boolean);
 	if (statuses.length) {
 		leftParts.push(chip("status", statuses.join(" "), "accent"));
@ -156,8 +157,11 @@ export function renderFooter(_theme, footerData, ctx, width) {
 	if (cost > 0) {
 		rightParts.push(chip("spent", `$${cost.toFixed(2)}`, "warning"));
 	}
-	const cxTone = cxPct >= 85 ? "error" : cxPct >= 60 ? "warning" : "success";
-	rightParts.push(chip("ctx", `${Math.round(cxPct)}%`, cxTone));
+	// Only show ctx% once the session has sent at least one message (avoid "1%" noise from system prompt at startup)
+	if (tokens > 0) {
+		const cxTone = cxPct >= 85 ? "error" : cxPct >= 60 ? "warning" : "success";
+		rightParts.push(chip("ctx", `${Math.round(cxPct)}%`, cxTone));
+	}
 	let rightLine = join(rightParts);
 	const maxRightWidth = Math.max(16, Math.floor(width * 0.55));
 	if (visibleWidth(rightLine) > maxRightWidth) {
@ -199,7 +203,7 @@ export function renderAutoFooter(_theme, footerData, ctx, width) {

 	const statuses = Array.from(footerData.getExtensionStatuses().entries())
 		.sort(([a], [b]) => a.localeCompare(b))
-		.map(([, text]) => text.trim())
+		.map(([, text]) => String(text ?? "").trim())
 		.filter(Boolean);
 	if (statuses.length) {
 		leftParts.push(ansiFg(SE.gray60, statuses.join(" ")));
--- a/src/resources/extensions/sf-tui/git.js
+++ b/src/resources/extensions/sf-tui/git.js
@ -38,7 +38,7 @@ function getLastCommit(cwd) {
 }
 function getDiffStats(cwd) {
 	try {
-		const raw = execFileSync("git", ["diff", "--stat"], {
+		const raw = execFileSync("git", ["diff", "HEAD", "--stat"], {
 			cwd,
 			encoding: "utf-8",
 			stdio: ["pipe", "pipe", "ignore"],
@ -48,10 +48,11 @@ function getDiffStats(cwd) {
 		let deleted = 0;
 		let modified = 0;
 		for (const line of raw.split("\n")) {
-			const m = line.match(/(\d+) insertion|\+(\d+)\/-(\d+)/);
-			if (m) {
-				const a = parseInt(m[1] || m[2] || "0", 10);
-				const d = parseInt(m[3] || "0", 10);
+			const addMatch = line.match(/(\d+) insertion/);
+			const delMatch = line.match(/(\d+) deletion/);
+			if (addMatch || delMatch) {
+				const a = addMatch ? parseInt(addMatch[1], 10) : 0;
+				const d = delMatch ? parseInt(delMatch[1], 10) : 0;
 				if (a) added += a;
 				if (d) deleted += d;
 				if (a || d) modified++;
--- a/src/resources/extensions/sf/auto/phases.js
+++ b/src/resources/extensions/sf/auto/phases.js
@ -36,6 +36,7 @@ import {
 } from "../auto-tool-tracking.js";
 import {
 	assessAutonomousSolverTurn,
+	appendAutonomousSolverCheckpoint,
 	beginAutonomousSolverIteration,
 	buildAutonomousSolverMissingCheckpointRepairPrompt,
 	buildAutonomousSolverPromptBlock,
@ -2362,12 +2363,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 		});
 	}
 	if (solverAssessment.action === "pause") {
-		const missingCheckpointDiagnosis =
-			solverAssessment.reason === "solver-missing-checkpoint"
-				? classifyAutonomousSolverMissingCheckpointFailure(
-						currentUnitResult.event?.messages ?? [],
-					)
-				: null;
+		const isMissingCheckpoint =
+			solverAssessment.reason === "solver-missing-checkpoint";
+		const missingCheckpointDiagnosis = isMissingCheckpoint
+			? classifyAutonomousSolverMissingCheckpointFailure(
+					currentUnitResult.event?.messages ?? [],
+				)
+			: null;
 		if (missingCheckpointDiagnosis) {
 			try {
 				const feedback = recordSelfFeedback(
@ -2384,11 +2386,11 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 							missingCheckpointDiagnosis.evidence ?? "",
 						].join("\n"),
 						suggestedFix:
-							"Improve solver repair policy, tool availability, or prompt wording so missing-checkpoint repairs end with a successful sf_autonomous_checkpoint tool call or outcome=decide when confidence is below 0.98.",
+							"Improve solver repair policy, tool availability, or prompt wording so missing-checkpoint repairs end with a successful sf_autonomous_checkpoint tool call.",
 						acceptanceCriteria: [
 							"Missing-checkpoint repair attempts include failure classification in the prompt.",
 							"Repeated repair failures file self-feedback automatically.",
-							"Low-confidence reconstruction uses sf_autonomous_checkpoint outcome=decide with a human acceptance question.",
+							"Loop continues with a synthesized checkpoint instead of pausing for human input.",
 						],
 						occurredIn: { unitType, unitId },
 						source: "runtime",
@ -2409,15 +2411,70 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 					},
 				});
 			} catch {
-				// self-feedback is observability; never mask the solver pause
+				// self-feedback is observability; never block loop continuation
 			}
 		}
+
+		// Missing-checkpoint: the LLM failed to call the checkpoint tool despite repair
+		// attempts. Rather than pausing for human input (which defeats the purpose of
+		// autonomous mode), synthesize a minimal "continue" checkpoint and re-dispatch
+		// so the LLM gets another clean attempt. The max-iterations guard will catch
+		// genuine infinite loops. Only hard blockers and max-iterations pause the loop.
+		if (isMissingCheckpoint) {
+			try {
+				appendAutonomousSolverCheckpoint(s.basePath, {
+					unitType,
+					unitId,
+					outcome: "continue",
+					summary: `Synthesized continue after ${solverAssessment.repairAttempts ?? "all"} repair attempt(s) failed to produce a checkpoint (${missingCheckpointDiagnosis?.classification ?? "unknown"}). Re-dispatching.`,
+					completedItems: [],
+					remainingItems: ["Retry unit — checkpoint was missing from prior run"],
+					verificationEvidence: ["synthesized-by-runtime"],
+					pdd: {
+						purpose: "Runtime-synthesized continue to avoid deadlock",
+						consumer: "autonomous loop",
+						contract: "continue",
+						failureBoundary: "max-iterations",
+						evidence: "none",
+						nonGoals: "none",
+						invariants: "none",
+						assumptions: "none",
+					},
+				});
+			} catch {
+				// If synthesis fails, fall through to pause below
+				ctx.ui.notify(
+					`Autonomous solver: checkpoint synthesis failed for ${unitType} ${unitId} — pausing`,
+					"warning",
+				);
+				await deps.pauseAuto(ctx, pi);
+				return { action: "break", reason: solverAssessment.reason };
+			}
+			deps.emitJournalEvent({
+				ts: new Date().toISOString(),
+				flowId: ic.flowId,
+				seq: ic.nextSeq(),
+				eventType: "solver-missing-checkpoint-synthesized-continue",
+				data: {
+					unitType,
+					unitId,
+					repairAttempts: solverAssessment.repairAttempts,
+					classification: missingCheckpointDiagnosis?.classification,
+				},
+			});
+			ctx.ui.notify(
+				`Autonomous solver: all repair attempts exhausted for ${unitType} ${unitId} — synthesizing continue and re-dispatching (LLM will try again)`,
+				"info",
+			);
+			// Fall through: the synthesized checkpoint's action will be "continue" on
+			// the next assessment, so the loop re-dispatches the unit automatically.
+			return { action: "continue" };
+		}
+
 		const reason =
-			solverCheckpoint?.outcome === "decide"
-				? (solverCheckpoint.decisionQuestion ?? solverCheckpoint.summary)
-				: solverCheckpoint?.outcome === "blocked"
-					? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary)
-					: solverAssessment.reason;
+			solverCheckpoint?.outcome === "blocked"
+				? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary)
+				: solverAssessment.reason;
 		deps.emitJournalEvent({
 			ts: new Date().toISOString(),
 			flowId: ic.flowId,
@ -2434,7 +2491,6 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
 				maxIterations: solverAssessment.state?.maxIterations,
 				remainingItems: solverCheckpoint?.remainingItems ?? [],
 				evidencePath: ".sf/runtime/autonomous-solver/LOOP.md",
-				...(missingCheckpointDiagnosis ? { missingCheckpointDiagnosis } : {}),
 			},
 		});
 		ctx.ui.notify(
--- a/src/resources/extensions/sf/autonomous-solver.js
+++ b/src/resources/extensions/sf/autonomous-solver.js
@ -230,7 +230,7 @@ export function buildAutonomousSolverPromptBlock(state) {
 		'- `outcome: "complete"` only when this unit\'s normal completion tool/artifact is also done.',
 		'- `outcome: "continue"` when you made real progress but more autonomous iterations are needed.',
 		'- `outcome: "blocked"` when the next step cannot proceed without unavailable facts, credentials, or a broken environment.',
-		'- `outcome: "decide"` when there is a material product/architecture choice that must not be decided autonomously.',
+		'- `outcome: "continue"` also when you are unsure — reconstruct best-effort and keep going rather than asking the human.',
 		"",
 		"Checkpoint the eight PDD fields every time:",
 		"- Purpose: why this behavior exists and what value it protects.",
@ -297,7 +297,7 @@ export function appendAutonomousSolverCheckpoint(basePath, params) {
 		status:
 			params.outcome === "complete"
 				? "complete"
-				: params.outcome === "blocked" || params.outcome === "decide"
+				: params.outcome === "blocked"
 					? "paused"
 					: "running",
 		updatedAt: checkpoint.ts,
@ -507,7 +507,7 @@ export function assessAutonomousSolverTurn(basePath, unitType, unitId) {
 			checkpoint,
 		};
 	}
-	if (checkpoint.outcome === "blocked" || checkpoint.outcome === "decide") {
+	if (checkpoint.outcome === "blocked") {
 		return {
 			action: "pause",
 			reason: `solver-${checkpoint.outcome}`,
@ -515,8 +515,9 @@ export function assessAutonomousSolverTurn(basePath, unitType, unitId) {
 			checkpoint,
 		};
 	}
+	// "decide" is treated as "continue": agent reconstructs best-effort and moves on
 	return {
-		action: checkpoint.outcome === "continue" ? "continue" : "complete",
+		action: checkpoint.outcome === "continue" || checkpoint.outcome === "decide" ? "continue" : "complete",
 		reason: `solver-${checkpoint.outcome}`,
 		state,
 		checkpoint,
@ -657,15 +658,16 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
 			"2. List files in the milestone/slice/task directories to find what artifacts exist.",
 			"3. Read any SUMMARY.md or PLAN.md files to understand what progress was made.",
 			"4. Based on the evidence, call sf_autonomous_checkpoint with the appropriate outcome and PDD fields.",
-			"5. **Important**: If you cannot determine what happened with high confidence (≥0.98), use outcome='decide' and ask the human what the checkpoint should contain.",
+			"5. Based on the evidence, call sf_autonomous_checkpoint with the appropriate outcome and PDD fields.",
+			"6. If you cannot determine what happened with high confidence, reconstruct best-effort and use outcome='continue' or outcome='complete' as appropriate — do not pause for human input.",
 		);
 		lines.push(
 			"",
 			"**Low-confidence reconstruction guidance**:",
-			"- Use outcome='decide' when evidence is sparse or ambiguous (confidence < 0.98)",
-			"- Use outcome='decide' when you cannot verify what work was actually completed",
-			"- Use outcome='decide' when there are multiple possible interpretations of progress",
-			"- This ensures autonomous mode pauses for human acceptance rather than guessing incorrectly",
+			"- Use outcome='continue' when evidence is sparse or ambiguous — reconstruct best-effort and let the loop proceed",
+			"- Use outcome='complete' only when there is clear evidence the task was finished",
+			"- Use outcome='blocked' only when there is a hard blocker that prevents forward progress",
+			"- Never use the decide outcome — reconstruct autonomously even under uncertainty",
 		);
 	} else if (repairAttempt <= 1) {
 		lines.push("Do not continue implementation work in this repair turn.");
@ -686,15 +688,15 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
 	}
 	if (repairAttempt >= 3) {
 		lines.push(
-			'If your confidence that the reconstructed checkpoint is correct is below 0.98, call sf_autonomous_checkpoint with outcome="decide" and put the human acceptance question in decisionQuestion.',
+			"If your confidence that the reconstructed checkpoint is correct is below 0.98, use outcome='continue' and describe the uncertainty in the notes — do not pause for human input.",
 		);
 	}
 	if (repairAttempt >= maxRepairAttempts) {
 		lines.push(
-			'This is the final automatic repair attempt. Prefer outcome="decide" over guessing; autonomous mode will pause with your decision question for human acceptance.',
+			"This is the final automatic repair attempt. Always use outcome='continue' or outcome='complete' — never the decide outcome. Reconstruct best-effort and let the autonomous loop continue.",
 		);
 		lines.push(
-			'**Final guidance**: If there is any doubt about the correctness of the checkpoint, use outcome="decide" with a clear question asking the human to specify the correct state.',
+			"**Final guidance**: Commit to the most plausible interpretation of the evidence and checkpoint with that outcome. Do not pause for human review.",
 		);
 	}
 	lines.push(
--- a/src/resources/extensions/sf/commands-mcp-status.js
+++ b/src/resources/extensions/sf/commands-mcp-status.js
@ -100,7 +100,7 @@ export function formatMcpServerDetail(server) {
 }
 // ─── Command handler ────────────────────────────────────────────────────────
 /**
- * Handle `/mcp [status|check <server>]`.
+ * Handle `/mcp [status|check <server>|reload]`.
 */
 export async function handleMcpStatus(args, ctx) {
 	const trimmed = args.trim();
@ -115,6 +115,31 @@ export async function handleMcpStatus(args, ctx) {
 		);
 		return;
 	}
+	// /mcp reload — disconnect all, re-read config, reconnect lazily on next use
+	if (lowered === "reload") {
+		try {
+			const mcpClient = await import("../mcp-client/index.js");
+			if (typeof mcpClient.disconnectAll === "function") {
+				await mcpClient.disconnectAll();
+				const fresh = readMcpConfigs();
+				ctx.ui.notify(
+					`MCP servers reloaded — ${fresh.length} server(s) configured. Connections will re-establish on next use.\n\n${fresh.map((s) => `  ○ ${s.name} (${s.transport})`).join("\n") || "  (none)"}`,
+					"info",
+				);
+			} else {
+				ctx.ui.notify(
+					"MCP client does not support hot-reload. Use /reload to restart the extension layer.",
+					"warning",
+				);
+			}
+		} catch {
+			ctx.ui.notify(
+				"Failed to reload MCP servers. Config may be invalid — check .mcp.json or .sf/mcp.json.",
+				"error",
+			);
+		}
+		return;
+	}
 	// /mcp check <server>
 	if (lowered.startsWith("check ")) {
 		const serverName = trimmed.slice("check ".length).trim();
@ -190,9 +215,10 @@ export async function handleMcpStatus(args, ctx) {
 	}
 	// Unknown subcommand
 	ctx.ui.notify(
-		"Usage: /mcp [status|check <server>]\n\n" +
+		"Usage: /mcp [status|check <server>|reload]\n\n" +
 			"  status           Show all MCP server statuses (default)\n" +
-			"  check <server>   Detailed status for a specific server",
+			"  check <server>   Detailed status for a specific server\n" +
+			"  reload           Disconnect all servers and re-read config (no restart needed)",
 		"warning",
 	);
 }
--- a/src/resources/extensions/sf/commands/catalog.js
+++ b/src/resources/extensions/sf/commands/catalog.js
@ -152,7 +152,7 @@ export const TOP_LEVEL_SUBCOMMANDS = [
 		desc: "Switch to repair work mode and run diagnostics [--autonomous]",
 	},
 	{ cmd: "tasks", desc: "Background work surface — units, workers, budget" },
-	{ cmd: "skills", desc: "List discovered skills from .agents/skills/" },
+	{ cmd: "skills", desc: "List discovered skills from .agents/skills/ [reload|--eval|--auto-create]" },
 	{
 		cmd: "uok",
 		desc: "UOK runtime health: ledger, last run, last error, startup gate, gate metrics",
@ -461,6 +461,10 @@ const NESTED_COMPLETIONS = {
 	mcp: [
 		{ cmd: "status", desc: "Show all MCP server statuses (default)" },
 		{ cmd: "check", desc: "Detailed status for a specific server" },
+		{
+			cmd: "reload",
+			desc: "Disconnect all MCP servers and re-read config — no restart needed",
+		},
 	],
 	doctor: [
 		{ cmd: "fix", desc: "Auto-fix detected issues" },
--- a/src/resources/extensions/sf/commands/handlers/core.js
+++ b/src/resources/extensions/sf/commands/handlers/core.js
@ -73,7 +73,7 @@ export function showHelp(ctx, args = "") {
 		"  /doctor         Diagnose and repair .sf/ state",
 		"  /repair         Switch to repair work mode and run diagnostics",
 		"  /tasks          Background work surface",
-		"  /skills         List discovered skills",
+		"  /skills         List discovered skills  [reload|--eval <name>|--auto-create]",
 		"  /cost           Show cost summary [--session|--all|--prometheus]",
 		"",
 		"Use /help all for the complete command reference.",
@ -140,13 +140,14 @@ export function showHelp(ctx, args = "") {
 		"  /hooks          Show post-unit hook configuration",
 		"  /extensions     Manage extensions  [list|enable|disable|info]",
 		"  /fast           Toggle OpenAI service tier  [on|off|flex|status]",
-		"  /mcp            External MCP server status  [status|check <server>]",
+		"  /mcp            External MCP server status  [status|check <server>|reload]",
 		"",
 		"MAINTENANCE",
 		"  /doctor         Diagnose and repair .sf/ state  [audit|fix|heal] [scope]",
 		"  /repair         Switch to repair work mode and run diagnostics  [--autonomous]",
 		"  /tasks          Background work surface  [--refresh|--failed|--cancelled|--all]",
 		"  /skills         List discovered skills from .agents/skills/",
+		"  /skills reload  Reload skills from disk — picks up new/updated skill files",
 		"  /skills --eval <name>  Run eval cases for a skill",
 		"  /reload         Snapshot & reload agent, resume same session",
 		"  /export         Export milestone/slice results  [--json|--markdown|--html] [--all]",
@ -687,6 +688,16 @@ export async function handleCoreCommand(trimmed, ctx, pi) {
 	}
 	if (trimmed === "skills" || trimmed.startsWith("skills ")) {
 		const args = trimmed.replace(/^skills\s*/, "").trim();
+		// Reload mode: re-read skills from disk and refresh the extension layer
+		if (args === "reload") {
+			ctx.ui.notify("Reloading skills from disk...", "info");
+			await ctx.reload();
+			ctx.ui.notify(
+				"Skills reloaded. New and updated skill files are now active.",
+				"info",
+			);
+			return true;
+		}
 		// Auto-create mode: detect patterns and generate skills
 		if (args === "--auto-create" || args === "-a") {
 			const {
--- a/src/resources/extensions/sf/tests/autonomous-solver.test.mjs
+++ b/src/resources/extensions/sf/tests/autonomous-solver.test.mjs
@ -38,7 +38,7 @@ function pdd(overrides = {}) {
 		contract:
 			"Checkpoint contains outcome, progress, evidence, and remaining work.",
 		failureBoundary:
-			"Blocked or decide outcomes pause instead of continuing blind.",
+			"Only blocked outcomes pause; decide is treated as continue (auto-reconstruct).",
 		evidence: "Projection and JSONL history are written.",
 		nonGoals: "Does not replace the normal task completion tool.",
 		invariants: "Each checkpoint is tied to one unit id.",
@ -129,7 +129,8 @@ describe("autonomous solver", () => {
 		expect(prompt).toContain("Purpose:");
 		expect(prompt).toContain("Consumer:");
 		expect(prompt).toContain("Failure boundary:");
-		expect(prompt).toContain('outcome: "decide"');
+		expect(prompt).not.toContain('outcome: "decide"');
+		expect(prompt).toContain("reconstruct best-effort");
 	});

 	test("buildAutonomousSolverMissingCheckpointRepairPrompt_rejects_file_substitutes", () => {
@ -145,7 +146,7 @@ describe("autonomous solver", () => {
 		expect(prompt).toContain("final action");
 	});

-	test("buildAutonomousSolverMissingCheckpointRepairPrompt_escalates_to_confidence_gated_decide", () => {
+	test("buildAutonomousSolverMissingCheckpointRepairPrompt_escalates_to_autonomous_reconstruct", () => {
 		const prompt = buildAutonomousSolverMissingCheckpointRepairPrompt(
 			{ iteration: 2 },
 			"research-slice",
@ -158,8 +159,8 @@ describe("autonomous solver", () => {
 		expect(prompt).toContain("Repair attempt: 3 of 4");
 		expect(prompt).toContain("confidence");
 		expect(prompt).toContain("0.98");
-		expect(prompt).toContain('outcome="decide"');
-		expect(prompt).toContain("decisionQuestion");
+		expect(prompt).not.toContain('outcome="decide"');
+		expect(prompt).toContain("outcome='continue'");
 	});

 	test("assessAutonomousSolverTurn_missing_checkpoint_escalates_repairs_then_pauses", () => {
@ -243,6 +244,31 @@ describe("autonomous solver", () => {
 		expect(blocked.reason).toBe("solver-blocked");
 	});

+	test("assessAutonomousSolverTurn_decide_continues_instead_of_pausing", () => {
+		// "decide" outcome was previously a human-in-the-loop escape hatch.
+		// Policy change: treat "decide" as "continue" — auto-reconstruct best-effort.
+		const project = makeProject();
+		beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
+		appendAutonomousSolverCheckpoint(project, {
+			unitType: "execute-task",
+			unitId: "M001/S01/T01",
+			outcome: "decide",
+			summary: "Low confidence — reconstructed best-effort.",
+			completedItems: ["Analysis done"],
+			remainingItems: [],
+			verificationEvidence: ["artifacts match expectations"],
+			pdd: pdd(),
+		});
+		const result = assessAutonomousSolverTurn(
+			project,
+			"execute-task",
+			"M001/S01/T01",
+		);
+		// Must not pause — the loop should continue autonomously
+		expect(result.action).not.toBe("pause");
+		expect(result.action).toBe("continue");
+	});
+
 	test("assessAutonomousSolverTurn_max_iterations_pauses_before_unbounded_retry", () => {
 		const project = makeProject();
 		beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01", {
@ -298,7 +324,8 @@ describe("autonomous solver", () => {
 		expect(prompt).toContain("No transcript was captured");
 		expect(prompt).toContain(".sf/runtime/autonomous-solver/LOOP.md");
 		expect(prompt).toContain("SUMMARY.md");
-		expect(prompt).toContain("outcome='decide'");
+		expect(prompt).not.toContain("outcome='decide'");
+		expect(prompt).toContain("outcome='continue'");
 	});

 	test("getConfiguredAutonomousSolverMaxIterations_clamps_preference", () => {