sf snapshot: uncommitted changes after 33m inactivity

2026-05-10 07:54:07 +02:00 · 2026-05-10 07:54:07 +02:00 · 529138db9a
commit 529138db9a
parent 7085ad850d
12 changed files with 199 additions and 68 deletions
--- a/.sf/backups/db/sf.db.2026-05-10T04-45-58-550Z
+++ b/.sf/backups/db/sf.db.2026-05-10T04-45-58-550Z
--- a/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z
+++ b/.sf/backups/db/sf.db.2026-05-10T05-22-28-577Z
--- a/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z
+++ b/.sf/backups/db/sf.db.2026-05-10T05-37-52-529Z
--- a/.sf/graphs/graph.json
+++ b/.sf/graphs/graph.json
@ -0,0 +1,64 @@
+{
+  "nodes": [
+    {
+      "id": "concept:phase:planning",
+      "label": "Phase: planning",
+      "type": "concept",
+      "confidence": "EXTRACTED",
+      "sourceFile": "STATE.md"
+    },
+    {
+      "id": "milestone:M001",
+      "label": "M001",
+      "type": "milestone",
+      "confidence": "EXTRACTED"
+    },
+    {
+      "id": "slice:M001:S01",
+      "label": "S01: Recover corrupted DB from backup",
+      "type": "slice",
+      "confidence": "EXTRACTED",
+      "sourceFile": "milestones/M001/slices/S01/S01-PLAN.md"
+    },
+    {
+      "id": "slice:M001:S02",
+      "label": "S02: Execute S01-T01 — CLI/help doctrine fix",
+      "type": "slice",
+      "confidence": "EXTRACTED",
+      "sourceFile": "milestones/M001/slices/S02/S02-PLAN.md"
+    },
+    {
+      "id": "milestone:M002",
+      "label": "M002",
+      "type": "milestone",
+      "confidence": "EXTRACTED"
+    }
+  ],
+  "edges": [
+    {
+      "from": "milestone:M001",
+      "to": "slice:M001:S01",
+      "type": "contains",
+      "confidence": "EXTRACTED"
+    },
+    {
+      "from": "milestone:M001",
+      "to": "slice:M001:S02",
+      "type": "contains",
+      "confidence": "EXTRACTED"
+    },
+    {
+      "from": "milestone:M001",
+      "to": "slice:M001:S01",
+      "type": "contains",
+      "confidence": "EXTRACTED"
+    },
+    {
+      "from": "milestone:M001",
+      "to": "slice:M001:S02",
+      "type": "contains",
+      "confidence": "EXTRACTED"
+    }
+  ],
+  "builtAt": "2026-05-10T05:08:17.329Z"
+}
--- a/.sf/metrics.db
+++ b/.sf/metrics.db
--- a/.sf/metrics.db-shm
+++ b/.sf/metrics.db-shm
--- a/.sf/metrics.db-wal
+++ b/.sf/metrics.db-wal
--- a/.sf/model-performance.json
+++ b/.sf/model-performance.json
@ -44,5 +44,27 @@
      "successRate": 1,
      "total": 2
    }
+  },
+  "run-uat": {
+    "minimax/MiniMax-M2.7-highspeed": {
+      "successes": 1,
+      "failures": 0,
+      "timeouts": 0,
+      "totalTokens": 0,
+      "totalCost": 0,
+      "lastUsed": "2026-05-10T05:22:57.604Z",
+      "successRate": 1,
+      "total": 1
+    },
+    "google-gemini-cli/gemini-3.1-pro-preview": {
+      "successes": 2,
+      "failures": 0,
+      "timeouts": 0,
+      "totalTokens": 1700534,
+      "totalCost": 0.14063507999999997,
+      "lastUsed": "2026-05-10T05:40:27.616Z",
+      "successRate": 1,
+      "total": 2
+    }
  }
 }
--- a/src/resources/extensions/sf/auto.js
+++ b/src/resources/extensions/sf/auto.js
@ -1399,6 +1399,26 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
 		debugLog("startAuto", { phase: "already-active", skipping: true });
 		return;
 	}
+	// ── Command context guard ────────────────────────────────────────────────
+	// Autonomous mode requires a ctx with newSession() to start clean sessions
+	// for each unit. Shortcut handlers (Ctrl+Y, registerShortcut) receive an
+	// ExtensionContext which does NOT have newSession. Fall back to the last
+	// known command ctx if available; otherwise block with an actionable message.
+	if (typeof ctx.newSession !== "function") {
+		if (typeof s.lastCommandCtx?.newSession === "function") {
+			ctx = s.lastCommandCtx;
+		} else {
+			ctx.ui.notify(
+				"Autonomous mode requires a command context with newSession. Run /autonomous once first, then use the keyboard shortcut.",
+				"warning",
+			);
+			debugLog("startAuto", { phase: "no-command-ctx", skipping: true });
+			return;
+		}
+	} else {
+		// Cache the valid command ctx for future shortcut-triggered starts.
+		s.lastCommandCtx = ctx;
+	}
 	// Gate: if the user is in Ask mode (manual runControl and not already in
 	// build workMode), ask permission to switch to Build mode.
 	// Skip if workMode is already "build" — runControl is reset to "manual" on
@ -1901,10 +1921,14 @@ export async function dispatchHookUnit(
 	targetBasePath,
 ) {
 	if (!s.active) {
+		// Guard: ctx from hook/shortcut callers may lack newSession(); fall back to cached command ctx.
+		const hookCtx = typeof ctx.newSession === "function"
+			? ctx
+			: (typeof s.lastCommandCtx?.newSession === "function" ? s.lastCommandCtx : ctx);
 		s.active = true;
 		s.stepMode = true;
 		s.runControl = "assisted";
-		s.cmdCtx = ctx;
+		s.cmdCtx = hookCtx;
 		s.basePath = targetBasePath;
 		s.autoStartTime = Date.now();
 		s.currentUnit = null;
@ -1917,6 +1941,14 @@ export async function dispatchHookUnit(
 		id: triggerUnitId,
 		startedAt: hookStartedAt,
 	};
+	if (typeof s.cmdCtx?.newSession !== "function") {
+		ctx.ui.notify(
+			`Hook dispatch for ${hookName} failed: no command context with newSession available. Run /autonomous once first.`,
+			"error",
+		);
+		await stopAuto(ctx, pi);
+		return false;
+	}
 	const result = await s.cmdCtx.newSession();
 	if (result.cancelled) {
 		await stopAuto(ctx, pi);
--- a/src/resources/extensions/sf/auto/run-unit.js
+++ b/src/resources/extensions/sf/auto/run-unit.js
@ -75,6 +75,21 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
 	// keepSession=false (default): start a clean session for each new unit.
 	if (!keepSession) {
 		debugLog("runUnit", { phase: "session-create", unitType, unitId });
+		// Guard: s.cmdCtx must have newSession() (ExtensionCommandContext). If it
+		// doesn't, autonomous mode was started from a shortcut handler without a
+		// valid command ctx — startAuto() should have caught this, but defend here
+		// as a last resort so we get a clear error category instead of a TypeError.
+		if (typeof s.cmdCtx?.newSession !== "function") {
+			return {
+				status: "cancelled",
+				errorContext: {
+					message:
+						"cmdCtx.newSession is not available — autonomous mode was started from a non-command context. Run /autonomous once to establish a command context.",
+					category: "session-failed",
+					isTransient: false,
+				},
+			};
+		}
 		let sessionResult;
 		let sessionTimeoutHandle;
 		const mySessionSwitchGeneration = ++sessionSwitchGeneration;
--- a/src/resources/extensions/sf/auto/session.js
+++ b/src/resources/extensions/sf/auto/session.js
@ -137,6 +137,18 @@ export class AutoSession {
 	activeEngineId = null;
 	activeRunDir = null;
 	cmdCtx = null;
+	/**
+	 * Last known ExtensionCommandContext that had newSession().
+	 *
+	 * Purpose: allow autonomous mode to start from non-command contexts (shortcut
+	 * handlers, event handlers) by falling back to a previously cached command
+	 * context. newSession() is bound at registration time and remains valid across
+	 * session switches, so this is safe to reuse after /clear.
+	 *
+	 * NOT in reset() — intentionally persists across auto-stop/restart cycles so
+	 * Ctrl+Y and similar shortcuts work after the loop exits cleanly.
+	 */
+	lastCommandCtx = null;
 	/**
 	 * YOLO mode: build + autonomous + deep + unrestricted.
 	 * Tracks the local toggle state so the terminal title and status display
--- a/src/resources/extensions/sf/bootstrap/db-tools.js
+++ b/src/resources/extensions/sf/bootstrap/db-tools.js
@ -83,16 +83,15 @@ export function registerDbTools(pi) {
 		name: "save_decision",
 		label: "Save Decision",
 		description:
-			"Record a project decision to the SF database and regenerate DECISIONS.md. " +
-			"Decision IDs are auto-assigned — never provide an ID manually.",
+			"Record an architectural or technical decision and return its auto-assigned ID (e.g. D001). " +
+			"Call this whenever a non-trivial choice is made about architecture, libraries, patterns, or observability so the rationale is durable and reviewable.",
 		promptSnippet:
-			"Record a project decision to the SF database (auto-assigns ID, regenerates DECISIONS.md)",
+			"Record a project decision (auto-assigns ID, regenerates DECISIONS.md)",
 		promptGuidelines: [
-			"Use save_decision when recording an architectural, pattern, library, or observability decision.",
-			"Decision IDs are auto-assigned (D001, D002, ...) — never guess or provide an ID.",
-			"All fields except revisable, when_context, and made_by are required.",
-			"The tool writes to the DB and regenerates .sf/DECISIONS.md automatically.",
-			"Set made_by to 'human' when the user explicitly directed the decision, 'agent' when the LLM chose autonomously (default), or 'collaborative' when it was discussed and agreed together.",
+			"Call save_decision for architectural, library, pattern, or observability choices — not for task-level implementation details.",
+			"Decision IDs are auto-assigned — never guess or provide one.",
+			"scope, decision, choice, and rationale are required; revisable, when_context, and made_by are optional.",
+			"Set made_by to 'human' when the user directed it, 'agent' when you chose autonomously, or 'collaborative' when agreed together.",
 		],
 		parameters: Type.Object({
 			scope: Type.String({
@ -211,15 +210,14 @@ export function registerDbTools(pi) {
 		name: "update_requirement",
 		label: "Update Requirement",
 		description:
-			"Update an existing requirement in the SF database and regenerate REQUIREMENTS.md. " +
-			"Provide the requirement ID (e.g. R001) and any fields to update.",
+			"Update an existing requirement by ID and return confirmation — only fields you provide are changed. " +
+			"Call this when a requirement's status, validation evidence, description, or owning slice changes after it was first recorded.",
 		promptSnippet:
-			"Update an existing SF requirement by ID (regenerates REQUIREMENTS.md)",
+			"Update an existing requirement by ID (only provided fields are changed)",
 		promptGuidelines: [
-			"Use update_requirement to change status, validation, notes, or other fields on an existing requirement.",
-			"The id parameter is required — it must be an existing RXXX identifier.",
-			"All other fields are optional — only provided fields are updated.",
-			"The tool verifies the requirement exists before updating.",
+			"id is required and must be an existing requirement identifier (e.g. R001).",
+			"All other fields are optional — only the fields you provide are updated.",
+			"Use this to mark a requirement validated, deferred, or to correct its description after new evidence.",
 		],
 		parameters: Type.Object({
 			id: Type.String({ description: "The requirement ID (e.g. R001, R014)" }),
@ -326,15 +324,14 @@ export function registerDbTools(pi) {
 		name: "save_requirement",
 		label: "Save Requirement",
 		description:
-			"Record a new requirement to the SF database and regenerate REQUIREMENTS.md. " +
-			"Requirement IDs are auto-assigned — never provide an ID manually.",
+			"Record a new requirement and return its auto-assigned ID (e.g. R001). " +
+			"Call this when a functional, non-functional, or operational requirement is identified that the project must satisfy.",
 		promptSnippet:
-			"Record a new SF requirement to the database (auto-assigns ID, regenerates REQUIREMENTS.md)",
+			"Record a new requirement (auto-assigns ID, regenerates REQUIREMENTS.md)",
 		promptGuidelines: [
-			"Use save_requirement when recording a new functional, non-functional, or operational requirement.",
-			"Requirement IDs are auto-assigned (R001, R002, ...) — never guess or provide an ID.",
-			"class, description, why, and source are required. All other fields are optional.",
-			"The tool writes to the DB and regenerates .sf/REQUIREMENTS.md automatically.",
+			"Requirement IDs are auto-assigned — never guess or provide one.",
+			"class, description, why, and source are required; all other fields are optional.",
+			"Use update_requirement to change an existing requirement rather than saving a duplicate.",
 		],
 		parameters: Type.Object({
 			class: Type.String({
@ -399,16 +396,14 @@ export function registerDbTools(pi) {
 		name: "save_summary",
 		label: "Save Summary",
 		description:
-			"Save a summary, research, context, or assessment artifact to the SF database and write it to disk. " +
-			"Computes the file path from milestone/slice/task IDs automatically.",
+			"Save a research, summary, context, or assessment artifact to disk with an auto-computed path. " +
+			"Call this to persist planning or research output (e.g. a research brief, context doc, or summary) for a milestone, slice, or task.",
 		promptSnippet:
-			"Save a SF artifact (summary/research/context/assessment) to DB and disk",
+			"Save a planning artifact (SUMMARY/RESEARCH/CONTEXT/ASSESSMENT) to disk",
 		promptGuidelines: [
-			"Use save_summary to persist structured artifacts (SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT).",
-			"milestone_id is required. slice_id and task_id are optional — they determine the file path.",
-			"The tool computes the relative path automatically: milestones/M001/M001-SUMMARY.md, milestones/M001/slices/S01/S01-SUMMARY.md, etc.",
+			"milestone_id is required; slice_id and task_id are optional and determine the file path.",
 			"artifact_type must be one of: SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT.",
-			"Use CONTEXT-DRAFT for incremental draft persistence; use CONTEXT for the final milestone context after depth verification.",
+			"Use CONTEXT-DRAFT for incremental saves; use CONTEXT only for the final milestone context after verification.",
 		],
 		parameters: Type.Object({
 			milestone_id: Type.String({ description: "Milestone ID (e.g. M001)" }),
@ -528,15 +523,14 @@ export function registerDbTools(pi) {
 		name: "new_milestone_id",
 		label: "Generate Milestone ID",
 		description:
-			"Generate the next sequential milestone ID and return it as a string. " +
-			"Always call this when creating a new milestone — never invent or hardcode milestone IDs manually.",
+			"Generate and reserve the next milestone ID (e.g. M001) and return it as a string. " +
+			"Always call this before creating a new milestone — never invent or hardcode a milestone ID.",
 		promptSnippet:
-			"Generate a valid milestone ID (respects unique_milestone_ids preference)",
+			"Generate a valid milestone ID before creating a new milestone",
 		promptGuidelines: [
-			"ALWAYS call new_milestone_id before creating a new milestone directory or writing milestone files.",
-			"Never invent or hardcode milestone IDs like M001, M002 — always use this tool.",
-			"Call it once per milestone you need to create. For multi-milestone projects, call it once for each milestone in sequence.",
-			"The tool returns the correct format based on project preferences (e.g. M001 or M001-r5jzab).",
+			"Call new_milestone_id once per new milestone, before any other milestone creation steps.",
+			"Never hardcode milestone IDs like M001 or M002 — always use this tool.",
+			"For multiple milestones, call it once per milestone in sequence.",
 		],
 		parameters: Type.Object({}),
 		execute: milestoneGenerateIdExecute,
@ -638,22 +632,17 @@ export function registerDbTools(pi) {
 		name: "report_issue",
 		label: "Self Report",
 		description:
-			"Record an observation about the agent tooling itself — a bug, missing feature, confusing prompt, friction, or improvement idea — for future review. " +
-			"Over-reporting is preferred; duplicates are resolved later. " +
-			"Do not use this for bugs in the user's project — only for observations about the agent tooling itself.",
+			"File an observation about the agent tooling itself — a bug, confusing prompt, missing feature, friction, or improvement idea — and return the new entry ID. " +
+			"Use this whenever you notice something wrong or suboptimal about how the agent tooling behaves, not for bugs in the user's project.",
 		promptSnippet:
-			"Report any sf-internal observation: bug, missing feature, prompt issue, idea, friction",
+			"Report any agent-tooling observation: bug, prompt issue, missing feature, or improvement idea",
 		promptGuidelines: [
-			"Use report_issue for ANY sf-internal observation — not just bugs. Acceptable kinds include: 'prompt-quality-issue' (you found a prompt ambiguous, contradictory, or missing context), 'improvement-idea' (a non-bug enhancement that would help), 'agent-friction' (workflow friction you worked around), 'design-thought' (broader speculation), 'missing-feature' (capability you wished sf had), as well as classic bug kinds like 'brittle-predicate' or 'git-empty-pathspec'.",
-			"Do NOT use this for bugs in the user's project, for your own task work, or to track your task's todo list. ONLY for observations about sf-the-tool itself.",
-			"This tool FILES new entries; it does not resolve existing ones. High/critical forge self-feedback may be queued autonomously at startup or an idle turn boundary as repair work. Use resolve_issue after fixing an entry; do not hand-edit the JSONL.",
-			"Over-reporting is preferred to under-reporting at this stage. If you noticed it about sf, file it. Dedup and threshold-to-roadmap promotion are tracked as their own self-feedback items and will eventually clean noise.",
-			"Severity guide: low = cosmetic / nice-to-have / improvement idea. medium = noisy or imperfect or recurring friction. high = blocked the unit (sf-the-tool prevented you from completing the task). critical = needs immediate fix (currently treated as high until inline-fix dispatch lands).",
-			"high/critical entries mark the originating unit as blocked: it will not seal as success, and will be re-queued only after sf is bumped past the recorded version.",
-			"Provide concrete evidence — log excerpt, command, file path, error message, the literal prompt text that confused you, etc. Vague reports are not actionable; specific ones are.",
-			"If you have a hypothesis about the fix, include it as suggested_fix. Even a half-baked idea is more useful than nothing.",
-			"For high/critical entries, include acceptance_criteria — concrete conditions a future resolver must satisfy before calling this resolved. Without it, 'resolved' is just trust; with it, the resolver has a falsifiable bar. Phrase as 1. ... 2. ... 3. ... so each can be checked off independently.",
-			"occurred_in is auto-filled from the active auto.lock; only override if you're reporting from outside the current unit.",
+			"Use report_issue for agent-tooling observations only — not for bugs in the user's project or your own task work.",
+			"Acceptable kinds: prompt-quality-issue, improvement-idea, agent-friction, design-thought, missing-feature, brittle-predicate, git-empty-pathspec.",
+			"Severity: low = cosmetic, medium = recurring friction, high = blocked a task unit, critical = needs immediate fix.",
+			"Include concrete evidence (log excerpt, command, file path, error message) and a suggested_fix if you have one.",
+			"For high/critical entries, include acceptance_criteria so the resolver has a falsifiable bar to meet.",
+			"Over-reporting is preferred — dedup and cleanup happen separately.",
 		],
 		parameters: Type.Object({
 			kind: Type.String({
@ -816,15 +805,14 @@ export function registerDbTools(pi) {
 		name: "resolve_issue",
 		label: "Resolve Self Feedback",
 		description:
-			"Mark a previously reported agent-system issue as resolved and record the fix evidence (commit SHA, test path, or narrative). " +
+			"Mark a previously reported agent-tooling issue as resolved and record the fix evidence (commit SHA, test path, or narrative). " +
 			"Call this only after the fix is implemented and verified — not speculatively.",
 		promptSnippet:
-			"Resolve a repaired SF self-feedback entry with commit/test evidence",
+			"Resolve a filed agent-tooling issue with commit or test evidence",
 		promptGuidelines: [
-			"Use resolve_issue during self-feedback inline-fix repair turns after the fix is implemented and verified.",
-			"Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`; this tool updates the durable self-feedback store and regenerates the markdown projection.",
-			"If the entry has acceptance criteria, pass criteria_met with the criteria that were satisfied.",
-			"Pass commit_sha when a commit exists. If an entry was already fixed, cite the existing commit or include summary_narrative and test_path.",
+			"Call resolve_issue after implementing and verifying the fix, not before.",
+			"Pass commit_sha when a commit exists; use summary_narrative and test_path when a commit is not the right artifact.",
+			"If the entry had acceptance criteria, pass criteria_met with the criteria you satisfied.",
 		],
 		parameters: Type.Object({
 			id: Type.String({
@ -923,19 +911,17 @@ export function registerDbTools(pi) {
 		name: "checkpoint",
 		label: "Autonomous Checkpoint",
 		description:
-			"Save a structured progress checkpoint for the current autonomous task — capturing what was done, what's blocked, and what remains. " +
-			"Call this before ending every turn in autonomous mode to make progress visible and recoverable.",
+			"Save a structured progress snapshot for the current task iteration — what was done, what's blocked, and what remains — so progress is visible and recoverable. " +
+			"Call this before ending every autonomous turn.",
 		promptSnippet:
-			"Checkpoint autonomous solver progress with PDD fields and semantic outcome",
+			"Save a progress checkpoint before ending an autonomous turn",
 		promptGuidelines: [
-			"Call checkpoint before ending an autonomous unit turn.",
-			"Do not write SUMMARY.md, LOOP.md, task files, or chat prose as a substitute for this tool call.",
-			"The checkpoint is recorded only when this actual tool returns success.",
-			"Use outcome=complete only when the normal unit completion artifact/tool is also complete.",
-			"Use outcome=continue when you made real progress but the unit needs another autonomous iteration.",
-			"Use outcome=blocked for missing facts, credentials, broken environment, or impossible next steps.",
-			"Use outcome=decide for material product or architecture choices that autonomous mode must not decide silently.",
-			"Fill all eight PDD fields: purpose, consumer, contract, failureBoundary, evidence, nonGoals, invariants, assumptions.",
+			"Call checkpoint before ending any autonomous task turn.",
+			"outcome=complete only when the unit's normal completion tool also succeeded.",
+			"outcome=continue when real progress was made but more iterations are needed.",
+			"outcome=blocked for missing facts, broken environment, or impossible next steps.",
+			"outcome=decide for material product or architecture choices that must not be made silently.",
+			"Fill all eight structured fields: purpose, consumer, contract, failureBoundary, evidence, nonGoals, invariants, assumptions.",
 		],
 		parameters: Type.Object({
 			unitType: Type.String({