sf snapshot: uncommitted changes after 37m inactivity

2026-05-10 09:56:56 +02:00 · 2026-05-10 09:56:56 +02:00 · b2bcb922de
commit b2bcb922de
parent 7e8e3aa846
9 changed files with 107 additions and 12 deletions
--- a/.sf/backups/db/sf.db.2026-05-10T07-56-50-335Z
+++ b/.sf/backups/db/sf.db.2026-05-10T07-56-50-335Z
--- a/.sf/metrics.db
+++ b/.sf/metrics.db
--- a/.sf/model-performance.json
+++ b/.sf/model-performance.json
@ -41,6 +41,16 @@
      "lastUsed": "2026-05-10T02:32:24.115Z",
      "successRate": 1,
      "total": 3
+    },
+    "minimax/MiniMax-M2.7": {
+      "successes": 2,
+      "failures": 0,
+      "timeouts": 0,
+      "totalTokens": 6498562,
+      "totalCost": 2.22445842,
+      "lastUsed": "2026-05-10T07:26:04.505Z",
+      "successRate": 1,
+      "total": 2
    }
  },
  "discuss-milestone": {
--- a/src/resources/extensions/sf/auto/loop.js
+++ b/src/resources/extensions/sf/auto/loop.js
@ -21,6 +21,7 @@ import {
 	scheduleSidecarQueue,
 } from "../uok/execution-graph.js";
 import { resolveUokFlags } from "../uok/flags.js";
+import { clearRunawayRecoveredRuntimeRecords } from "../uok/unit-runtime.js";
 import { logWarning } from "../workflow-logger.js";
 import {
 	COOLDOWN_FALLBACK_WAIT_MS,
@ -422,6 +423,13 @@ async function runExitSolverEval(ctx, s, deps, iteration) {
 */
 export async function autoLoop(ctx, pi, s, deps) {
 	debugLog("autoLoop", { phase: "enter" });
+	const runawayHeal = clearRunawayRecoveredRuntimeRecords(s.basePath);
+	if (runawayHeal > 0) {
+		debugLog("autoLoop", {
+			phase: "runaway-runtime-heal",
+			cleared: runawayHeal,
+		});
+	}
 	let iteration = 0;
 	// Load persisted stuck state so counters survive session restarts (#3704)
 	const persisted = loadStuckState(s.basePath);
@ -480,7 +488,10 @@ export async function autoLoop(ctx, pi, s, deps) {
 				iteration,
 			});
 			if (s.isYolo()) {
-				logWarning("dispatch", `YOLO: loop at ${iteration} iterations — continuing past safety limit`);
+				logWarning(
+					"dispatch",
+					`YOLO: loop at ${iteration} iterations — continuing past safety limit`,
+				);
 			} else {
 				await deps.stopAuto(
 					ctx,
@ -502,7 +513,10 @@ export async function autoLoop(ctx, pi, s, deps) {
 					`Memory pressure: ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%) — stopping autonomous mode to prevent OOM kill`,
 				);
 				if (s.isYolo()) {
-					logWarning("dispatch", "YOLO: continuing despite memory pressure — OOM risk accepted");
+					logWarning(
+						"dispatch",
+						"YOLO: continuing despite memory pressure — OOM risk accepted",
+					);
 				} else {
 					await deps.stopAuto(
 						ctx,
@ -694,7 +708,10 @@ export async function autoLoop(ctx, pi, s, deps) {
 				});
 				if (guardsResult.action === "break") {
 					if (s.isYolo()) {
-						logWarning("dispatch", `YOLO: bypassing guard break for ${iterData.unitId}`);
+						logWarning(
+							"dispatch",
+							`YOLO: bypassing guard break for ${iterData.unitId}`,
+						);
 					} else {
 						finishTurn("stopped", "manual-attention", "guard-break");
 						break;
@ -983,7 +1000,10 @@ export async function autoLoop(ctx, pi, s, deps) {
 				deps.uokObserver?.onPhaseResult("guard", guardsResult.action);
 				if (guardsResult.action === "break") {
 					if (s.isYolo()) {
-						logWarning("dispatch", `YOLO: bypassing guard break for ${iterData.unitId}`);
+						logWarning(
+							"dispatch",
+							`YOLO: bypassing guard break for ${iterData.unitId}`,
+						);
 					} else {
 						finishTurn("stopped", "manual-attention", "guard-break");
 						break;
--- a/src/resources/extensions/sf/guided-flow.js
+++ b/src/resources/extensions/sf/guided-flow.js
@ -76,6 +76,7 @@ import { resolveUokFlags } from "./uok/flags.js";
 import { UokGateRunner } from "./uok/gate-runner.js";
 import { ensurePlanV2Graph as ensurePlanningFlowGraph } from "./uok/plan.js";
 import {
+	clearRunawayRecoveredRuntimeRecords,
 	clearUnitRuntimeRecord,
 	listUnitRuntimeRecords,
 } from "./uok/unit-runtime.js";
@ -1389,15 +1390,15 @@ async function dispatchDiscussForMilestone(
 */
 /**
 * Self-heal: scan runtime records and clear stale ones left behind when
- * autonomous mode crashed mid-unit. auto.ts has its own selfHealRuntimeRecords()
- * but guided-flow (manual /mode) never called it — meaning stale records
- * persisted until the next /autonomous run. This ensures the workflow entry
- * starts from a clean state regardless of how the previous session ended.
+ * autonomous mode crashed mid-unit. Recover `runaway-recovered` snapshots so
+ * `decideUnitRuntimeDispatch` stops blocking autonomous and guided resumes.
+ * Clearing at entry matches a fresh session boundary and avoids indefinite
+ * `runaway-recovery-reset-required` wedges.
 */
 function selfHealRuntimeRecords(basePath, ctx) {
 	try {
+		let cleared = clearRunawayRecoveredRuntimeRecords(basePath);
 		const records = listUnitRuntimeRecords(basePath);
-		let cleared = 0;
 		for (const record of records) {
 			const { unitType, unitId, phase } = record;
 			// Clear records whose expected artifact already exists (completed but not cleaned up)
--- a/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs
+++ b/src/resources/extensions/sf/tests/uok-unit-runtime.test.mjs
@ -10,6 +10,7 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, test } from "vitest";
 import {
+	clearRunawayRecoveredRuntimeRecords,
 	clearUnitRuntimeRecord,
 	decideUnitRuntimeDispatch,
 	getUnitRuntimeState,
@ -244,6 +245,30 @@ test("readUnitRuntimeRecord_returns_null_for_missing", () => {
 	assert.equal(record, null);
 });

+test("clearRunawayRecoveredRuntimeRecords_clears_only_runaway_terminal_rows", () => {
+	const root = makeProject();
+	const t0 = Date.now();
+	writeUnitRuntimeRecord(root, "discuss-milestone", "M001-X", t0, {
+		status: "runaway-recovered",
+		phase: "paused",
+		runawayGuardPause: { reason: "test pause" },
+	});
+	writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t0, {
+		status: "failed",
+		phase: "timeout",
+	});
+	assert.equal(listUnitRuntimeRecords(root).length, 2);
+	assert.equal(clearRunawayRecoveredRuntimeRecords(root), 1);
+	const after = listUnitRuntimeRecords(root);
+	assert.equal(after.length, 1);
+	assert.equal(after[0].unitType, "execute-task");
+});
+
+test("clearRunawayRecoveredRuntimeRecords_returns_zero_when_dir_missing", () => {
+	const root = makeProject();
+	assert.equal(clearRunawayRecoveredRuntimeRecords(root), 0);
+});
+
 test("clearUnitRuntimeRecord_removes_file_and_cache", () => {
 	const root = makeProject();
 	writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", Date.now(), {
--- a/src/resources/extensions/sf/unit-runtime.js
+++ b/src/resources/extensions/sf/unit-runtime.js
@ -6,6 +6,7 @@
 * continue to work without changes.
 */
 export {
+	clearRunawayRecoveredRuntimeRecords,
 	clearUnitRuntimeRecord,
 	decideUnitRuntimeDispatch,
 	formatExecuteTaskRecoveryStatus,
--- a/src/resources/extensions/sf/uok/index.js
+++ b/src/resources/extensions/sf/uok/index.js
@ -198,6 +198,7 @@ export {
 } from "./unit-lineage.js";
 // ─── Unit Runtime ──────────────────────────────────────────────────────────
 export {
+	clearRunawayRecoveredRuntimeRecords,
 	clearUnitRuntimeRecord,
 	decideUnitRuntimeDispatch,
 	formatExecuteTaskRecoveryStatus,
--- a/src/resources/extensions/sf/uok/unit-runtime.js
+++ b/src/resources/extensions/sf/uok/unit-runtime.js
@ -449,6 +449,38 @@ export function clearUnitRuntimeRecord(basePath, unitType, unitId) {
 	_runtimeCache.delete(path);
 	if (existsSync(path)) unlinkSync(path);
 }
+/**
+ * Removes every unit-runtime file stuck in `runaway-recovered` so dispatch can resume.
+ *
+ * Purpose: runaway watchdog recovery leaves terminal records that
+ * `decideUnitRuntimeDispatch` intentionally blocks (`runaway-recovery-reset-required`)
+ * until an operator/session boundary clears them. Clearing at autonomous entry and
+ * guided-flow self-heal matches treating a new launch as that boundary so workflows
+ * are not wedged indefinitely.
+ *
+ * Consumer: autoLoop bootstrap and guided-flow `selfHealRuntimeRecords`.
+ *
+ * Returns: count of runtime files removed.
+ */
+export function clearRunawayRecoveredRuntimeRecords(basePath) {
+	let cleared = 0;
+	for (const record of listUnitRuntimeRecords(basePath)) {
+		const unitType = record.unitType;
+		const unitId = record.unitId;
+		if (
+			typeof unitType !== "string" ||
+			unitType.length === 0 ||
+			typeof unitId !== "string" ||
+			unitId.length === 0
+		) {
+			continue;
+		}
+		if (getUnitRuntimeState(record).status !== "runaway-recovered") continue;
+		clearUnitRuntimeRecord(basePath, unitType, unitId);
+		cleared++;
+	}
+	return cleared;
+}
 /**
 * Return all runtime records currently on disk for `basePath`.
 * Returns an empty array if the runtime directory does not exist.
@ -489,11 +521,16 @@ export async function inspectExecuteTaskDurability(basePath, unitId) {
 	if (isDbAvailable()) {
 		const taskRow = getTask(mid, sid, tid);
 		const taskStatus = taskRow?.status ?? "pending";
-		nextActionAdvanced = taskStatus !== "pending" && taskStatus !== "in_progress";
+		nextActionAdvanced =
+			taskStatus !== "pending" && taskStatus !== "in_progress";
 	} else {
 		const stateAbs = join(sfRoot(basePath), "STATE.md");
-		const stateContent = existsSync(stateAbs) ? readFileSync(stateAbs, "utf-8") : "";
-		nextActionAdvanced = !new RegExp(`Execute ${escapedTid}\\b`).test(stateContent);
+		const stateContent = existsSync(stateAbs)
+			? readFileSync(stateAbs, "utf-8")
+			: "";
+		nextActionAdvanced = !new RegExp(`Execute ${escapedTid}\\b`).test(
+			stateContent,
+		);
 	}
 	// Must-have coverage: load task plan and count mentions in summary
 	let mustHaveCount = 0;