sf snapshot: pre-dispatch, uncommitted changes after 43m inactivity

2026-04-25 06:34:49 +02:00 · 2026-04-25 06:34:49 +02:00 · e2147c0694
commit e2147c0694
parent 7b6c9dd099
40 changed files with 3485 additions and 298 deletions
--- a/flake.nix
+++ b/flake.nix
@ -24,6 +24,7 @@
          clippy
          git
          nodejs_24
+          protobuf
          rust-analyzer
          rustc
          rustfmt
@ -39,6 +40,7 @@
          echo "  bun  : $(command -v bun)"
          echo "  cargo: $(command -v cargo)"
          echo "  node : $(command -v node)"
+          echo "  protoc: $(command -v protoc)"
          echo "  rustc: $(command -v rustc)"
          echo ""
          echo "Build native addon:"
--- a/packages/pi-coding-agent/src/core/system-prompt.ts
+++ b/packages/pi-coding-agent/src/core/system-prompt.ts
@ -35,6 +35,26 @@ export interface BuildSystemPromptOptions {
 	contextFiles?: Array<{ path: string; content: string }>;
 	/** Pre-loaded skills. */
 	skills?: Skill[];
+	/**
+	 * Optional predicate applied to the `skills` list before rendering the
+	 * <available_skills> catalog. Returning `false` omits a skill from the
+	 * prompt (the skill remains loaded and invocable by name — only the
+	 * catalog listing is suppressed).
+	 *
+	 * Intended for consumers that can narrow the relevant skill surface
+	 * (e.g. per-unit-type manifests) to reduce cached system-prompt bloat.
+	 * When omitted, all non-`disableModelInvocation` skills render — i.e.
+	 * behavior is unchanged from before this option existed.
+	 *
+	 * Contract: the predicate must be **pure and synchronous**. It may be
+	 * invoked on every system-prompt rebuild (tool-set changes and
+	 * runtime resource-loader extensions both trigger one), so any state
+	 * the closure captures should be stable across the rebuild window.
+	 * If the predicate throws, `buildSystemPrompt` logs a warning and
+	 * falls back to the unfiltered skill list — callers never see the
+	 * exception and the session stays consistent.
+	 */
+	skillFilter?: (skill: Skill) => boolean;
 }

 /** Build the system prompt with tools, guidelines, and context */
@ -48,6 +68,7 @@ export function buildSystemPrompt(options: BuildSystemPromptOptions = {}): strin
 		cwd,
 		contextFiles: providedContextFiles,
 		skills: providedSkills,
+		skillFilter,
 	} = options;
 	const resolvedCwd = toPosixPath(cwd ?? process.cwd());

@ -66,7 +87,20 @@ export function buildSystemPrompt(options: BuildSystemPromptOptions = {}): strin
 	const appendSection = appendSystemPrompt ? `\n\n${appendSystemPrompt}` : "";

 	const contextFiles = providedContextFiles ?? [];
-	const skills = providedSkills ?? [];
+	const skillsBase = providedSkills ?? [];
+	let skills = skillsBase;
+	if (skillFilter) {
+		try {
+			skills = skillsBase.filter(skillFilter);
+		} catch (error) {
+			// A consumer's predicate threw. Fall back to the unfiltered list so
+			// the session stays consistent — callers (e.g. AgentSession.setTools)
+			// must not be left with updated tools but a stale system prompt.
+			const message = error instanceof Error ? error.message : String(error);
+			console.warn(`buildSystemPrompt: skillFilter threw; falling back to unfiltered skills. Error: ${message}`);
+			skills = skillsBase;
+		}
+	}

 	if (customPrompt) {
 		let prompt = customPrompt;
--- a/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/components/tool-execution.ts
@ -65,6 +65,67 @@ function parseMcpToolName(name: string): { server: string; tool: string } | null
 	return { server: rest.slice(0, delim), tool: rest.slice(delim + 2) };
 }

+/**
+ * Prettify a raw tool name for display. Prefers the registered `label`
+ * ("Complete Slice") when available; otherwise strips a leading `sf_`
+ * prefix and converts snake_case to Title Case.
+ */
+function prettifyToolName(name: string, label?: string): string {
+	if (label && label.trim().length > 0 && label !== name) return label;
+	const stripped = name.replace(/^sf_/, "");
+	if (stripped.length === 0) return name;
+	return stripped
+		.split("_")
+		.map((word) => (word.length === 0 ? word : word[0].toUpperCase() + word.slice(1)))
+		.join(" ");
+}
+
+type ToolFrameTone = "pending" | "success" | "error";
+
+function trimOuterBlankLines(lines: string[]): string[] {
+	let start = 0;
+	let end = lines.length;
+	while (start < end && lines[start].trim().length === 0) start++;
+	while (end > start && lines[end - 1].trim().length === 0) end--;
+	return lines.slice(start, end);
+}
+
+function renderToolFrame(
+	contentLines: string[],
+	width: number,
+	opts: {
+		label: string;
+		status: string;
+		tone: ToolFrameTone;
+	},
+): string[] {
+	const outerWidth = Math.max(20, width);
+	const contentWidth = Math.max(1, outerWidth - 2); // "│ " + content
+
+	const borderColor = opts.tone === "error" ? "error" : "toolTitle";
+	const topColor = opts.tone === "error" ? "error" : "toolTitle";
+	const labelColor = opts.tone === "error" ? "error" : "toolTitle";
+	const statusColor = opts.tone === "error" ? "error" : opts.tone === "pending" ? "warning" : "success";
+	const border = (s: string) => theme.fg(borderColor, s);
+
+	const leftStyled = theme.fg(labelColor, theme.bold(`• ${opts.label}`));
+	const rightStyled = theme.fg(statusColor, opts.status);
+	const gap = Math.max(1, outerWidth - visibleWidth(leftStyled) - visibleWidth(rightStyled));
+	const headerRow = `${leftStyled}${" ".repeat(gap)}${rightStyled}`;
+	const headerPad = Math.max(0, outerWidth - visibleWidth(headerRow));
+
+	const sourceLines = trimOuterBlankLines(contentLines);
+	const bodyLines = (sourceLines.length > 0 ? sourceLines : [""]).map((line) => {
+		const clipped = truncateToWidth(line, contentWidth, "");
+		return border("│ ") + clipped;
+	});
+
+	return [
+		theme.fg(topColor, "─".repeat(outerWidth)),
+		headerRow + " ".repeat(headerPad),
+		...bodyLines,
+	];
+}
 const COMPACT_ARG_VALUE_LIMIT = 60;
 const GENERIC_OUTPUT_PREVIEW_LINES = 10;
 const GENERIC_ARGS_JSON_PREVIEW_LINES = 10;
@ -83,15 +144,19 @@ function formatCompactArgs(args: unknown, expanded: boolean): string {

 	const allPrimitive = entries.every(([, value]) => {
 		const t = typeof value;
-		if (t === "number" || t === "boolean") return true;
-		if (t === "string") return (value as string).length <= COMPACT_ARG_VALUE_LIMIT;
-		return value == null;
+		return t === "number" || t === "boolean" || t === "string" || value == null;
 	});

 	if (allPrimitive) {
 		return entries
 			.map(([key, value]) => {
-				if (typeof value === "string") return `${key}=${JSON.stringify(value)}`;
+				if (typeof value === "string") {
+					const truncated =
+						!expanded && value.length > COMPACT_ARG_VALUE_LIMIT
+							? `${value.slice(0, COMPACT_ARG_VALUE_LIMIT - 1)}…`
+							: value;
+					return `${key}=${JSON.stringify(truncated)}`;
+				}
 				if (value == null) return `${key}=null`;
 				return `${key}=${String(value)}`;
 			})
@ -452,7 +517,22 @@ export class ToolExecutionComponent extends Container {
 		if (this.hideComponent) {
 			return [];
 		}
-		return super.render(width);
+		const frameWidth = Math.max(20, width);
+		const contentWidth = Math.max(1, frameWidth - 4);
+		const lines = super.render(contentWidth);
+		const frameTone: ToolFrameTone =
+			this.result?.isError ? "error" : this.isPartial || !this.result ? "pending" : "success";
+		const frameStatus = this.isPartial || !this.result ? "Running" : this.result.isError ? "Error" : "Done";
+		const parsed = parseMcpToolName(this.toolName);
+		const frameLabel = parsed
+			? `Tool ${parsed.server}·${parsed.tool}`
+			: `Tool ${prettifyToolName(this.toolName, this.toolDefinition?.label) || "unknown"}`;
+		const framed = renderToolFrame(lines, frameWidth, {
+			label: frameLabel,
+			status: frameStatus,
+			tone: frameTone,
+		});
+		return framed.length > 0 ? ["", ...framed] : framed;
 	}

 	private updateDisplay(): void {
@ -1050,7 +1130,9 @@ export class ToolExecutionComponent extends Container {
 			// cleanly. SF-registered MCP tools have already had their prefix
 			// stripped upstream in partial-builder.ts and won't reach this branch.
 			const parsed = parseMcpToolName(this.toolName);
-			const displayName = parsed ? parsed.tool : this.toolName;
+			const displayName = parsed
+				? parsed.tool
+				: prettifyToolName(this.toolName, this.toolDefinition?.label);
 			const serverPrefix = parsed ? theme.fg("muted", `${parsed.server}\u00b7`) : "";
 			text = serverPrefix + theme.fg("toolTitle", theme.bold(displayName));

--- a/packages/pi-coding-agent/src/tests/system-prompt-skill-filter.test.ts
+++ b/packages/pi-coding-agent/src/tests/system-prompt-skill-filter.test.ts
@ -0,0 +1,157 @@
+// @gsd/pi-coding-agent + system-prompt-skill-filter.test — coverage for the
+// optional `skillFilter` option added to buildSystemPrompt (RFC #4779). The
+// filter lets consumers narrow the <available_skills> catalog rendered into
+// the cached system prompt without touching skill loading or invocation.
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { buildSystemPrompt } from "../core/system-prompt.js";
+import type { Skill } from "../core/skills.js";
+
+function makeSkill(name: string, description = `description for ${name}`): Skill {
+	return {
+		name,
+		description,
+		filePath: `/tmp/${name}/SKILL.md`,
+		baseDir: `/tmp/${name}`,
+		source: "project",
+		disableModelInvocation: false,
+	};
+}
+
+function extractAvailableSkills(prompt: string): string {
+	const start = prompt.indexOf("<available_skills>");
+	const end = prompt.indexOf("</available_skills>");
+	if (start === -1 || end === -1) return "";
+	return prompt.slice(start, end + "</available_skills>".length);
+}
+
+// ─── Default branch (no customPrompt) ──────────────────────────────────────
+
+test("buildSystemPrompt: skillFilter omits filtered-out skills from <available_skills>", () => {
+	const skills = [makeSkill("alpha"), makeSkill("beta"), makeSkill("gamma")];
+	const prompt = buildSystemPrompt({
+		skills,
+		selectedTools: ["read", "Skill"],
+		skillFilter: skill => skill.name !== "beta",
+	});
+
+	const section = extractAvailableSkills(prompt);
+	assert.ok(section.length > 0, "catalog section should render");
+	assert.match(section, /<name>alpha<\/name>/);
+	assert.match(section, /<name>gamma<\/name>/);
+	assert.doesNotMatch(section, /<name>beta<\/name>/);
+});
+
+test("buildSystemPrompt: skillFilter omitted preserves pre-filter behavior (all skills render)", () => {
+	const skills = [makeSkill("alpha"), makeSkill("beta")];
+	const prompt = buildSystemPrompt({
+		skills,
+		selectedTools: ["read", "Skill"],
+	});
+
+	const section = extractAvailableSkills(prompt);
+	assert.match(section, /<name>alpha<\/name>/);
+	assert.match(section, /<name>beta<\/name>/);
+});
+
+test("buildSystemPrompt: skillFilter that rejects every skill suppresses the <available_skills> block", () => {
+	const skills = [makeSkill("alpha"), makeSkill("beta")];
+	const prompt = buildSystemPrompt({
+		skills,
+		selectedTools: ["read", "Skill"],
+		skillFilter: () => false,
+	});
+
+	// With zero visible skills, formatSkillsForPrompt returns an empty string,
+	// so the opening tag should not appear anywhere.
+	assert.ok(!prompt.includes("<available_skills>"));
+});
+
+// ─── Custom-prompt branch ──────────────────────────────────────────────────
+
+test("buildSystemPrompt (customPrompt): skillFilter applies to the catalog appended onto a custom prompt", () => {
+	const skills = [makeSkill("alpha"), makeSkill("beta"), makeSkill("gamma")];
+	const prompt = buildSystemPrompt({
+		customPrompt: "CUSTOM BASE",
+		skills,
+		selectedTools: ["read", "Skill"],
+		skillFilter: skill => skill.name === "alpha",
+	});
+
+	const section = extractAvailableSkills(prompt);
+	assert.match(section, /<name>alpha<\/name>/);
+	assert.doesNotMatch(section, /<name>beta<\/name>/);
+	assert.doesNotMatch(section, /<name>gamma<\/name>/);
+});
+
+// ─── Interaction with disableModelInvocation ──────────────────────────────
+
+test("buildSystemPrompt: skillFilter composes with disableModelInvocation (both must pass)", () => {
+	// A skill already hidden from the catalog by disableModelInvocation must
+	// remain hidden even if skillFilter would otherwise admit it. The filter
+	// narrows, it does not override the existing invisibility contract.
+	const skills: Skill[] = [
+		{ ...makeSkill("visible"), disableModelInvocation: false },
+		{ ...makeSkill("hidden"), disableModelInvocation: true },
+	];
+	const prompt = buildSystemPrompt({
+		skills,
+		selectedTools: ["read", "Skill"],
+		skillFilter: () => true,
+	});
+
+	const section = extractAvailableSkills(prompt);
+	assert.match(section, /<name>visible<\/name>/);
+	assert.doesNotMatch(section, /<name>hidden<\/name>/);
+});
+
+// ─── Pass-through of non-filtered fields ──────────────────────────────────
+
+test("buildSystemPrompt: skillFilter does not affect context files or cwd rendering", () => {
+	const skills = [makeSkill("alpha")];
+	const prompt = buildSystemPrompt({
+		skills,
+		cwd: "/tmp/example",
+		contextFiles: [{ path: "CLAUDE.md", content: "project instructions" }],
+		selectedTools: ["read", "Skill"],
+		skillFilter: () => false,
+	});
+
+	assert.ok(prompt.includes("/tmp/example"), "cwd should still render");
+	assert.ok(prompt.includes("project instructions"), "context files should still render");
+	assert.ok(!prompt.includes("<available_skills>"), "no skill catalog when filter rejects all");
+});
+
+// ─── Exception safety ─────────────────────────────────────────────────────
+
+test("buildSystemPrompt: skillFilter that throws falls back to unfiltered list and does not propagate", (t) => {
+	// A buggy consumer predicate must not bubble out of buildSystemPrompt.
+	// If it did, _rebuildSystemPrompt could unwind mid-setTools() and leave
+	// the session with updated tools but a stale system prompt.
+	const skills = [makeSkill("alpha"), makeSkill("beta")];
+
+	// Suppress the console.warn the fallback emits so test output stays clean.
+	const originalWarn = console.warn;
+	const warnings: string[] = [];
+	console.warn = (...args: unknown[]) => { warnings.push(args.join(" ")); };
+	t.after(() => { console.warn = originalWarn; });
+
+	let prompt = "";
+	assert.doesNotThrow(() => {
+		prompt = buildSystemPrompt({
+			skills,
+			selectedTools: ["read", "Skill"],
+			skillFilter: () => { throw new Error("consumer bug"); },
+		});
+	});
+
+	const section = extractAvailableSkills(prompt);
+	assert.match(section, /<name>alpha<\/name>/, "alpha should render (fallback to unfiltered)");
+	assert.match(section, /<name>beta<\/name>/, "beta should render (fallback to unfiltered)");
+	assert.ok(
+		warnings.some(w => w.includes("skillFilter threw") && w.includes("consumer bug")),
+		"fallback should emit an identifying warning",
+	);
+});
--- a/scripts/postinstall.js
+++ b/scripts/postinstall.js
@ -167,12 +167,22 @@ async function ensureRtkInstalled() {
      throw new Error('downloaded RTK binary failed validation')
    }
  } catch (error) {
-    logWarn(`RTK install skipped: ${error instanceof Error ? error.message : String(error)}`)
+    logWarn(`RTK install skipped: ${describeFetchError(error)}`)
  } finally {
    rmSync(tempRoot, { recursive: true, force: true })
  }
 }

+function describeFetchError(err) {
+  const base = err?.message || String(err)
+  const cause = err?.cause
+  if (!cause) return base
+  const code = cause.code || cause.errno
+  const causeMsg = cause.message || ''
+  const detail = code ? `${code}${causeMsg && causeMsg !== code ? ` — ${causeMsg}` : ''}` : causeMsg
+  return detail ? `${base} (${detail})` : base
+}
+
 if (!PLAYWRIGHT_SKIP) {
  await run('npx playwright install chromium')
 }
--- a/sf-orchestrator/SKILL.md
+++ b/sf-orchestrator/SKILL.md
@ -52,7 +52,10 @@ Route based on what you need to do:
 Read `workflows/build-from-spec.md` — write spec, init directory, launch, monitor, verify.

 **Check on a running or completed build:**
-Read `workflows/monitor-and-poll.md` — query state, interpret phases, handle blockers.
+Read `workflows/monitor-and-poll.md` — query state, interpret phases, handle blockers, recover from crashes.
+
+**Intercept SF questions interactively (supervised mode):**
+Read `workflows/monitor-and-poll.md#supervised-mode` — use when you want to handle SF's UI requests yourself instead of pre-supplying answers.

 **Execute with fine-grained control:**
 Read `workflows/step-by-step.md` — run one unit at a time with decision points.
--- a/sf-orchestrator/workflows/build-from-spec.md
+++ b/sf-orchestrator/workflows/build-from-spec.md
@ -63,8 +63,20 @@ EXIT=$?

 **With budget limit:**
 ```bash
-# Use step-by-step mode with budget checks instead of auto
-# See workflows/step-by-step.md
+# Create the milestone, then run step-by-step with a budget cap
+MAX_BUDGET=15.00
+sf headless --output-format json --context spec.md new-milestone 2>/dev/null
+while true; do
+  RESULT=$(sf headless --output-format json next 2>/dev/null)
+  EXIT=$?
+  [ $EXIT -ne 0 ] && break
+  STATE=$(sf headless query)
+  PHASE=$(echo "$STATE" | jq -r '.state.phase')
+  COST=$(echo "$STATE" | jq -r '.cost.total')
+  [ "$PHASE" = "complete" ] && { echo "Done (\$$COST)"; break; }
+  OVER=$(echo "$COST > $MAX_BUDGET" | bc -l)
+  [ "$OVER" = "1" ] && { echo "Budget cap hit at \$$COST"; sf headless stop; break; }
+done
 ```

 **For CI or ecosystem runs (no user config):**
--- a/sf-orchestrator/workflows/monitor-and-poll.md
+++ b/sf-orchestrator/workflows/monitor-and-poll.md
@ -163,6 +163,73 @@ sf headless --output-format json auto 2>/dev/null
 sf headless --resume "$SESSION_ID" --output-format json auto 2>/dev/null
 ```

+## Supervised Mode
+
+Use `--supervised` when you want SF to ask you questions interactively rather than auto-answering or blocking. SF writes UI requests to stdout as JSONL; you respond via stdin.
+
+**When to use it:** You're the orchestrator running in a loop and want to intercept SF's questions yourself instead of pre-supplying an answers file.
+
+```bash
+# Launch in supervised mode — SF will write extension_ui_request events to stdout
+# and wait for your response on stdin before continuing
+sf headless --supervised --json auto 2>/dev/null | while IFS= read -r line; do
+  TYPE=$(echo "$line" | jq -r '.type')
+
+  if [ "$TYPE" = "extension_ui_request" ]; then
+    # SF is asking a question — inspect it and respond
+    TITLE=$(echo "$line" | jq -r '.title // .message // "?"')
+    OPTIONS=$(echo "$line" | jq -r '.options[]?.label // empty' | head -5)
+    echo "SF asks: $TITLE" >&2
+    echo "Options: $OPTIONS" >&2
+
+    # Send your answer back on stdin (the option label or value)
+    echo "first_option"   # replace with your selection logic
+  fi
+done
+```
+
+`--response-timeout N` (default 30000ms) controls how long SF waits for your response before treating it as a timeout. If you don't respond in time, SF blocks with exit code 10.
+
+**Simpler alternative:** If you just want to pre-answer known questions without interactive handling, use `--answers <file>` instead — see `references/answer-injection.md`.
+
+## Crash Recovery
+
+When SF exits unexpectedly (crash, OOM, signal) or `.sf/` state looks corrupted:
+
+```bash
+cd /path/to/project
+
+# 1. Check if the project directory is intact
+ls .sf/ 2>/dev/null || { echo "No .sf/ — project state lost, start fresh"; exit 1; }
+
+# 2. Run doctor — detects and auto-fixes common state corruption
+sf headless doctor
+
+# 3. Check what state SF thinks it's in
+sf headless query | jq '{phase: .state.phase, next: .next}'
+
+# 4. If query fails (state unreadable), inspect STATE.md directly
+cat .sf/STATE.md 2>/dev/null
+
+# 5. Resume from current state
+sf headless --output-format json auto 2>/dev/null
+
+# 6. If a specific session was interrupted, resume by session ID
+sf headless --resume "$SESSION_ID" --output-format json auto 2>/dev/null
+```
+
+**Common crash scenarios:**
+
+| Symptom | Cause | Fix |
+|---------|-------|-----|
+| `query` returns empty / parse error | `.sf/STATE.md` corrupted | Run `sf headless doctor` |
+| Phase stuck at `advancing` | Slice summary write interrupted | Run `sf headless next` to retry |
+| Phase stuck at `completing-milestone` | Milestone archive write interrupted | Run `sf headless dispatch complete` |
+| Zombie `.sf/` lock file | Previous process killed mid-write | Run `sf headless doctor` |
+| `exit 1` with no JSON output | SF itself crashed (OOM, signal) | Check system logs; resume with `--resume` |
+
+If `doctor` can't recover the state, the safest path is to read `.sf/milestones/*/ROADMAP.md` to see what completed, then start a new milestone for remaining work.
+
 ## Reading Build Artifacts

 After completion, inspect what SF produced:
--- a/sf-orchestrator/workflows/step-by-step.md
+++ b/sf-orchestrator/workflows/step-by-step.md
@ -47,16 +47,18 @@ while true; do
      ;;
  esac

-  # Check if milestone complete
-  CURRENT_PHASE=$(sf headless query | jq -r '.state.phase')
+  # One query — extract phase, cost, and progress together
+  STATE=$(sf headless query)
+  CURRENT_PHASE=$(echo "$STATE" | jq -r '.state.phase')
+  TOTAL_COST=$(echo "$STATE" | jq -r '.cost.total')
+  PROGRESS=$(echo "$STATE" | jq -r '"\(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"')
+
  if [ "$CURRENT_PHASE" = "complete" ]; then
-    TOTAL_COST=$(sf headless query | jq -r '.cost.total')
    echo "Milestone complete. Total cost: \$$TOTAL_COST"
    break
  fi

  # Budget check
-  TOTAL_COST=$(sf headless query | jq -r '.cost.total')
  OVER=$(echo "$TOTAL_COST > $MAX_BUDGET" | bc -l)
  if [ "$OVER" = "1" ]; then
    echo "Budget limit (\$$MAX_BUDGET) exceeded at \$$TOTAL_COST"
@ -64,8 +66,6 @@ while true; do
    break
  fi

-  # Progress report
-  PROGRESS=$(sf headless query | jq -r '"\(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"')
  echo "Step done ($STATUS). Phase: $CURRENT_PHASE, Progress: $PROGRESS, Cost: \$$TOTAL_COST"
 done
 ```
@ -105,8 +105,9 @@ while true; do

  [ $EXIT -ne 0 ] && break

-  PHASE=$(sf headless query | jq -r '.state.phase')
-  COST=$(sf headless query | jq -r '.cost.total')
+  STATE=$(sf headless query)
+  PHASE=$(echo "$STATE" | jq -r '.state.phase')
+  COST=$(echo "$STATE" | jq -r '.cost.total')

  echo "Step $STEP complete. Phase: $PHASE, Cost: \$$COST"

--- a/src/headless.ts
+++ b/src/headless.ts
@ -358,6 +358,29 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
    return { exitCode: result.exitCode, interrupted: false }
  }

+  // Doctor: read-only health check, no RPC child needed (#4904 live-regression).
+  // The interactive `/sf doctor` command lives in the SF extension; this CLI
+  // path lets non-interactive callers (CI, recovery scripts, the live-regression
+  // suite) get the same diagnostic without a TTY.
+  if (options.command === 'doctor') {
+    const wantsJson = options.json || options.commandArgs.includes('--json')
+    const { runSFDoctor, formatDoctorReport, formatDoctorReportJson } = await import('./resources/extensions/sf/doctor.js')
+    let exitCode = 1
+    try {
+      const report = await runSFDoctor(process.cwd())
+      const out = wantsJson ? formatDoctorReportJson(report) : formatDoctorReport(report)
+      process.stdout.write(`${out}\n`)
+      exitCode = report.ok ? 0 : 1
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err)
+      process.stderr.write(`[headless] doctor failed: ${msg}\n`)
+      exitCode = 1
+    }
+    // Bypass the auto-restart loop in runHeadless — doctor is a one-shot
+    // diagnostic; exit 1 means "issues detected", not "crashed".
+    process.exit(exitCode)
+  }
+
  // Resolve CLI path for the child process
  const cliPath = process.env.SF_BIN_PATH || process.argv[1]
  if (!cliPath) {
--- a/src/resource-loader.ts
+++ b/src/resource-loader.ts
@ -128,17 +128,27 @@ function readManagedResourceManifest(agentDir: string): ManagedResourceManifest
 }

 /**
- * Computes a lightweight content fingerprint of the bundled resources directory.
+ * Computes a content fingerprint of a resources directory (defaults to the
+ * bundled resourcesDir).
 *
- * Walks all files under resourcesDir and hashes their relative paths + sizes.
- * This catches same-version content changes (npm link dev workflow, hotfixes
- * within a release) without the cost of reading every file's contents.
+ * Walks all files under `rootDir` and hashes `${relativePath}:${sha256(contents)}`
+ * for each one. Using the file *contents* — not size — is what distinguishes
+ * this from the earlier implementation and closes #4787: a same-size edit
+ * (e.g. swapping one word for another word of the same byte length) produces
+ * a different file hash, bumps the aggregate fingerprint, and therefore
+ * triggers a full resync in `initResources`. The old path+size approach
+ * silently cached stale prompts across upgrades.
 *
- * ~1ms for a typical resources tree (~100 files) — just stat calls, no reads.
+ * Cost is ~1-2ms for a typical resources tree (~100 small .md files) —
+ * still negligible at startup. Files are streamed via `readFileSync` but
+ * bundled prompts are tiny so this is fine.
+ *
+ * Exported for unit tests and for callers that want to check a different
+ * directory (e.g. pre-install verification).
 */
-function computeResourceFingerprint(): string {
+export function computeResourceFingerprint(rootDir: string = resourcesDir): string {
  const entries: string[] = []
-  collectFileEntries(resourcesDir, resourcesDir, entries)
+  collectFileEntries(rootDir, rootDir, entries)
  entries.sort()
  return createHash('sha256').update(entries.join('\n')).digest('hex').slice(0, 16)
 }
@ -151,8 +161,16 @@ function collectFileEntries(dir: string, root: string, out: string[]): void {
      collectFileEntries(fullPath, root, out)
    } else {
      const rel = relative(root, fullPath)
-      const size = statSync(fullPath).size
-      out.push(`${rel}:${size}`)
+      // Hash the file contents — see function doc for #4787 rationale.
+      let contentHash: string
+      try {
+        contentHash = createHash('sha256').update(readFileSync(fullPath)).digest('hex')
+      } catch {
+        // Unreadable file — fall back to a stable marker so the entry still
+        // contributes to the aggregate hash and future reads will re-hash.
+        contentHash = 'unreadable'
+      }
+      out.push(`${rel}:${contentHash}`)
    }
  }
 }
@ -220,7 +238,7 @@ function makeTreeWritable(dirPath: string): void {
 * 3. Copies source into destination.
 * 4. Makes the result writable for the next upgrade cycle.
 */
-function syncResourceDir(srcDir: string, destDir: string): void {
+export function syncResourceDir(srcDir: string, destDir: string): void {
  makeTreeWritable(destDir)
  if (existsSync(srcDir)) {
    pruneStaleSiblingFiles(srcDir, destDir)
--- a/src/resources/extensions/claude-code-cli/stream-adapter.ts
+++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts
@ -21,6 +21,10 @@ import type {
 import type { ExtensionUIContext } from "@singularity-forge/pi-coding-agent";
 import { EventStream } from "@singularity-forge/pi-ai";
 import { execSync } from "node:child_process";
+import { existsSync, readFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { createRequire } from "node:module";
+import { dirname, join } from "node:path";
 import { PartialMessageBuilder, ZERO_USAGE, mapUsage } from "./partial-builder.js";
 import { buildWorkflowMcpServers } from "../sf/workflow-mcp.js";
 import { showInterviewRound, type Question, type RoundResult } from "../shared/tui.js";
@ -597,6 +601,440 @@ async function promptTextInputElicitation(
 	return { action: "accept", content };
 }

+// ---------------------------------------------------------------------------
+// canUseTool handler
+// ---------------------------------------------------------------------------
+
+/** Options passed by the SDK to the canUseTool callback. */
+interface CanUseToolOptions {
+	signal: AbortSignal;
+	suggestions?: Array<Record<string, unknown>>;
+	blockedPath?: string;
+	decisionReason?: string;
+	title?: string;
+	displayName?: string;
+	description?: string;
+	toolUseID: string;
+	agentID?: string;
+}
+
+/** Result returned by the canUseTool callback to the SDK. */
+type CanUseToolPermissionResult =
+	| { behavior: "allow"; updatedInput?: Record<string, unknown>; updatedPermissions?: Array<Record<string, unknown>>; toolUseID?: string }
+	| { behavior: "deny"; message: string; interrupt?: boolean; toolUseID?: string };
+
+/**
+ * Known CLI tools where the subcommand verb changes the risk profile.
+ * Value = number of subcommand tokens (beyond the executable) to capture
+ * in the "Always Allow" permission pattern.
+ *
+ * `git push` and `git log` are very different → depth 1 → `Bash(git push:*)`
+ * `gh pr create` and `gh pr list` differ at depth 2 → `Bash(gh pr create:*)`
+ * `ping` is always safe → not listed → `Bash(ping:*)`
+ */
+const SUBCOMMAND_DEPTH: Record<string, number> = {
+	git: 1,
+	gh: 2,
+	npm: 1,
+	npx: 1,
+	yarn: 1,
+	pnpm: 1,
+	docker: 1,
+	kubectl: 1,
+	aws: 2,
+	az: 2,
+	gcloud: 2,
+	cargo: 1,
+	pip: 1,
+	pip3: 1,
+	brew: 1,
+	terraform: 1,
+	helm: 1,
+	dotnet: 1,
+};
+
+/** Command wrappers to skip when extracting the base executable. */
+const CMD_PASSTHROUGH = new Set(["sudo", "env", "command"]);
+
+/**
+ * Build a smart permission pattern for Bash "Always Allow".
+ *
+ * Simple commands → `Bash(ping:*)` (any args are fine)
+ * Subcommand-sensitive CLIs → `Bash(git push:*)` (verb is captured, args wildcarded)
+ */
+export function buildBashPermissionPattern(command: string): string {
+	// When the command is a chain like "cd /foo && gh pr list", extract the
+	// last segment — `cd` is just setup, the meaningful operation is what follows.
+	const segments = command.split(/\s*(?:&&|\|\||;)\s*/);
+	// Skip leading `cd` (directory setup) and trailing error suppressors
+	// like `|| true`, `|| :`, `|| echo ...`.  The meaningful command is
+	// the first segment that is *neither* of those.
+	const SETUP_RE = /^\s*cd\s/;
+	const SUPPRESSOR_RE = /^\s*(?:true|:|echo\b)/;
+	let meaningful: string | undefined;
+	if (segments.length > 1) {
+		// Strip suppressors, then strip cd prefixes; take the *last* remaining
+		// segment — that's the meaningful command.
+		const trimmed = segments.filter(s => !SUPPRESSOR_RE.test(s));
+		const core = trimmed.filter(s => !SETUP_RE.test(s));
+		meaningful = core.length > 0 ? core[core.length - 1] : trimmed[trimmed.length - 1];
+	}
+	meaningful = meaningful || segments[0] || command;
+	const rawTokens = meaningful.trim().split(/\s+/);
+
+	// Skip sudo/env wrappers and leading VAR=val assignments
+	let idx = 0;
+	while (idx < rawTokens.length) {
+		if (CMD_PASSTHROUGH.has(rawTokens[idx])) { idx++; continue; }
+		if (/^[A-Za-z_]\w*=/.test(rawTokens[idx])) { idx++; continue; }
+		break;
+	}
+	const tokens = rawTokens.slice(idx).filter(Boolean);
+	if (tokens.length === 0) return "Bash(*)";
+
+	// Strip path and .exe from executable name
+	const base = tokens[0].replace(/^.*[\\/]/, "").replace(/\.exe$/i, "");
+	const depth = SUBCOMMAND_DEPTH[base];
+
+	if (depth !== undefined) {
+		// Capture base + N subcommand tokens: "gh pr list" → Bash(gh pr list:*)
+		const significant = [base, ...tokens.slice(1, 1 + depth)].join(" ");
+		return `Bash(${significant}:*)`;
+	}
+
+	// Simple command — any args are fine: "ping" → Bash(ping:*)
+	return `Bash(${base}:*)`;
+}
+
+/**
+ * Build the list of granularity options presented after a user chooses
+ * "Always Allow" for a Bash command.
+ *
+ * Rather than assuming the user wants the default smart pattern, the UI
+ * shows every meaningful prefix so the user explicitly picks the scope:
+ *
+ *   "gh pr list --limit 5" → [
+ *     "Bash(gh:*)",         // allow any gh command
+ *     "Bash(gh pr:*)",      // allow any gh pr subcommand
+ *     "Bash(gh pr list:*)", // allow just this verb
+ *   ]
+ *
+ * Flags (tokens starting with `-`) terminate the subcommand chain — they
+ * are call-site arguments, not stable verbs. Subcommand depth is capped
+ * at 3 to keep the menu short (max 4 options).
+ *
+ * Returns a single-entry list when there is no meaningful subcommand to
+ * choose from (e.g. `ls -la`). Callers can skip the second dialog in
+ * that case.
+ */
+export function buildBashPermissionPatternOptions(command: string): string[] {
+	const segments = command.split(/\s*(?:&&|\|\||;)\s*/);
+	const SETUP_RE = /^\s*cd\s/;
+	const SUPPRESSOR_RE = /^\s*(?:true|:|echo\b)/;
+	let meaningful: string | undefined;
+	if (segments.length > 1) {
+		const trimmed = segments.filter(s => !SUPPRESSOR_RE.test(s));
+		const core = trimmed.filter(s => !SETUP_RE.test(s));
+		meaningful = core.length > 0 ? core[core.length - 1] : trimmed[trimmed.length - 1];
+	}
+	meaningful = meaningful || segments[0] || command;
+	const rawTokens = meaningful.trim().split(/\s+/);
+
+	let idx = 0;
+	while (idx < rawTokens.length) {
+		if (CMD_PASSTHROUGH.has(rawTokens[idx])) { idx++; continue; }
+		if (/^[A-Za-z_]\w*=/.test(rawTokens[idx])) { idx++; continue; }
+		break;
+	}
+	const tokens = rawTokens.slice(idx).filter(Boolean);
+	if (tokens.length === 0) return ["Bash(*)"];
+
+	const base = tokens[0].replace(/^.*[\\/]/, "").replace(/\.exe$/i, "");
+
+	// Collect up to 3 subcommand tokens, stopping at the first flag.
+	const subTokens: string[] = [];
+	for (let i = 1; i < tokens.length; i++) {
+		const t = tokens[i];
+		if (t.startsWith("-")) break;
+		subTokens.push(t);
+		if (subTokens.length >= 3) break;
+	}
+
+	const patterns: string[] = [`Bash(${base}:*)`];
+	for (let i = 1; i <= subTokens.length; i++) {
+		patterns.push(`Bash(${[base, ...subTokens.slice(0, i)].join(" ")}:*)`);
+	}
+	return patterns;
+}
+
+/**
+ * Read Bash allow-rule patterns from project and user settings files.
+ *
+ * Returns the ruleContent portion (e.g. `"gh pr list:*"`) for each
+ * `Bash(...)` entry found in `permissions.allow`.
+ */
+function readBashAllowRulesFromSettings(): string[] {
+	const rules: string[] = [];
+	const paths = [
+		join(process.cwd(), ".claude", "settings.local.json"),
+		join(process.cwd(), ".claude", "settings.json"),
+	];
+	try {
+		paths.push(join(homedir(), ".claude", "settings.json"));
+	} catch {
+		// homedir() can throw on some platforms
+	}
+	for (const settingsPath of paths) {
+		try {
+			if (!existsSync(settingsPath)) continue;
+			const raw = JSON.parse(readFileSync(settingsPath, "utf8"));
+			const allow = raw?.permissions?.allow;
+			if (!Array.isArray(allow)) continue;
+			for (const entry of allow) {
+				if (typeof entry !== "string") continue;
+				const m = /^Bash\((.+)\)$/.exec(entry);
+				if (m) rules.push(m[1]);
+			}
+		} catch {
+			// Ignore malformed settings files
+		}
+	}
+	return rules;
+}
+
+/**
+ * Check if a Bash compound command matches saved allow rules after
+ * extracting the meaningful segment.
+ *
+ * The SDK's built-in matcher refuses to match prefix rules against
+ * compound commands (e.g. `cd /path && gh pr list`). Claude Code
+ * routinely prepends `cd <cwd> &&` to commands, causing saved rules
+ * to never match on re-invocation. This function strips safe leading
+ * segments (only `cd` commands) and checks the remaining operation
+ * against saved rules.
+ *
+ * For compound commands, returns true only when all leading segments
+ * are `cd` commands and the final segment matches a saved rule.
+ * For simple (single-segment) commands, checks directly against saved
+ * rules — this covers the case where a rule was added mid-session and
+ * the SDK's in-memory cache is stale.
+ */
+export function bashCommandMatchesSavedRules(command: string): boolean {
+	const segments = command.split(/\s*(?:&&|\|\||;)\s*/).filter(Boolean);
+	if (segments.length === 0) return false;
+
+	let meaningful: string;
+	if (segments.length === 1) {
+		meaningful = segments[0].trim();
+	} else {
+		// Strip trailing error suppressors (|| true, || :, || echo ...)
+		// and leading cd segments.  The first remaining segment is the
+		// meaningful command.  All other non-cd, non-suppressor segments
+		// must be absent — otherwise we can't safely auto-approve.
+		const SETUP_RE = /^cd\s/;
+		const SUPPRESSOR_RE = /^\s*(?:true|:|echo\b)/;
+		const trimmed = segments.filter(s => !SUPPRESSOR_RE.test(s.trim()));
+		const core = trimmed.filter(s => !SETUP_RE.test(s.trim()));
+		if (core.length !== 1) return false; // ambiguous — multiple real commands
+		meaningful = core[0].trim();
+	}
+	if (!meaningful) return false;
+
+	const rules = readBashAllowRulesFromSettings();
+	if (rules.length === 0) return false;
+
+	for (const rule of rules) {
+		const prefixMatch = /^(.+):\*$/.exec(rule);
+		if (prefixMatch) {
+			const prefix = prefixMatch[1];
+			if (meaningful === prefix || meaningful.startsWith(prefix + " ")) {
+				return true;
+			}
+			continue;
+		}
+		// Exact match
+		if (meaningful === rule) return true;
+	}
+
+	return false;
+}
+
+/** Format the tool input into a human-readable summary for the permission prompt. */
+function formatToolInput(toolName: string, input: Record<string, unknown>): string {
+	// Bash — show the command
+	if (input.command && typeof input.command === "string") {
+		const cmd = input.command.length > 300 ? input.command.slice(0, 300) + "…" : input.command;
+		return cmd;
+	}
+	// File-oriented tools — show path
+	if (input.file_path && typeof input.file_path === "string") {
+		return `${toolName}: ${input.file_path}`;
+	}
+	// Generic fallback — compact JSON, truncated
+	const json = JSON.stringify(input);
+	if (json.length <= 200) return json;
+	return json.slice(0, 200) + "…";
+}
+
+/**
+ * Create a canUseTool handler that routes SDK permission requests through the
+ * extension UI's select dialog, or auto-approves when no UI is available.
+ *
+ * Presents three options:
+ * - **Allow** — approve this one invocation
+ * - **Always Allow** — approve and pass `suggestions` back as `updatedPermissions`
+ *   so the SDK remembers the choice for the rest of the session
+ * - **Deny** — reject the invocation
+ *
+ * Follows the same pattern as {@link createClaudeCodeElicitationHandler}:
+ * takes an optional UI context and returns the callback or undefined.
+ *
+ * When UI is unavailable (headless / auto-mode sub-agents), returns a handler
+ * that always approves — replacing the old GSD_AUTO_MODE → bypassPermissions
+ * workaround.
+ */
+export function createClaudeCodeCanUseToolHandler(
+	ui: ExtensionUIContext | undefined,
+): ((toolName: string, input: Record<string, unknown>, options: CanUseToolOptions) => Promise<CanUseToolPermissionResult>) | undefined {
+	if (!ui) return undefined;
+
+	return async (toolName, _input, options) => {
+		// Abort early if the signal is already fired
+		if (options.signal.aborted) {
+			return { behavior: "deny", message: "Aborted", toolUseID: options.toolUseID };
+		}
+
+		// For Bash compound commands (e.g. "cd /path && gh pr list"),
+		// check if the meaningful operation matches a saved allow rule.
+		// The SDK's built-in matcher rejects prefix rules for compound
+		// commands, but cd-prefixed commands are routine and the actual
+		// operation is already approved.
+		if (toolName === "Bash" && typeof _input.command === "string") {
+			if (bashCommandMatchesSavedRules(_input.command)) {
+				return { behavior: "allow", updatedInput: _input, toolUseID: options.toolUseID };
+			}
+		}
+
+		const inputSummary = formatToolInput(toolName, _input);
+		const title = options.title || `Allow Claude Code to use: ${toolName}?`;
+		const body = [
+			options.description,
+			inputSummary,
+		].filter(Boolean).join("\n");
+
+		// The 2nd menu (level picker) lets the user choose the exact pattern,
+		// so the 1st menu just shows "Always Allow" without a command suffix.
+		const alwaysAllowLabel = "Always Allow";
+
+		try {
+			const choice = await ui.select(
+				`${title}\n${body}`,
+				["Allow", alwaysAllowLabel, "Deny"],
+				{ signal: options.signal },
+			);
+
+			if (options.signal.aborted) {
+				return { behavior: "deny", message: "Aborted", toolUseID: options.toolUseID };
+			}
+
+			if (choice === alwaysAllowLabel) {
+				// Pass the SDK's own suggestions back as updatedPermissions so
+				// it knows how to persist them (PermissionUpdate[] shape).
+				// For Bash, patch the ruleContent with the user-chosen
+				// granularity pattern (e.g. "gh", "gh pr", "gh pr list") so
+				// the saved rule matches the scope the user actually wants.
+				let perms = options.suggestions;
+				let notifyLabel: string | undefined;
+				if (toolName === "Bash" && typeof _input.command === "string") {
+					// Present every meaningful prefix so the user picks the
+					// scope explicitly rather than getting a blanket match.
+					const patternOptions = buildBashPermissionPatternOptions(_input.command);
+					let chosenPattern: string;
+					if (patternOptions.length <= 1) {
+						// No subcommand choice to make (e.g. "ls -la") — use
+						// the single available pattern directly.
+						chosenPattern = patternOptions[0] ?? buildBashPermissionPattern(_input.command);
+					} else {
+						const levelChoiceRaw = await ui.select(
+							"Save permission at which level?",
+							patternOptions,
+							{ signal: options.signal },
+						);
+						if (options.signal.aborted) {
+							return { behavior: "deny", message: "Aborted", toolUseID: options.toolUseID };
+						}
+						const levelChoice = Array.isArray(levelChoiceRaw) ? levelChoiceRaw[0] : levelChoiceRaw;
+						if (!levelChoice || !patternOptions.includes(levelChoice)) {
+							// User dismissed the level picker — cancel the
+							// tool use. Falling back to a one-time allow
+							// here would leave the spawned agent running
+							// with no clear signal that the user bailed.
+							return {
+								behavior: "deny",
+								message: "User cancelled permission selection",
+								toolUseID: options.toolUseID,
+							};
+						}
+						chosenPattern = levelChoice;
+					}
+					notifyLabel = chosenPattern;
+					// Extract the ruleContent portion from "Bash(gh pr list:*)" → "gh pr list:*"
+					const ruleContent = chosenPattern.replace(/^Bash\(/, "").replace(/\)$/, "");
+					if (perms && Array.isArray(perms) && perms.length > 0) {
+						// Clone suggestions and patch ruleContent on any Bash addRules entry
+						perms = perms.map((s: any) => {
+							if (s.type === "addRules" && Array.isArray(s.rules)) {
+								return {
+									...s,
+									rules: s.rules.map((r: any) =>
+										r.toolName === "Bash" ? { ...r, ruleContent } : r,
+									),
+								};
+							}
+							return s;
+						});
+					} else {
+						// No suggestions from SDK — build a proper PermissionUpdate
+						perms = [{
+							type: "addRules",
+							rules: [{ toolName: "Bash", ruleContent }],
+							behavior: "allow",
+							destination: "localSettings",
+						}];
+					}
+				}
+				// Notify with the resolved pattern (label already previewed it)
+				if (notifyLabel) {
+					ui.notify(`Saved: ${notifyLabel}`, "info");
+				}
+				return {
+					behavior: "allow",
+					updatedInput: _input,
+					toolUseID: options.toolUseID,
+					...(perms ? { updatedPermissions: perms } : {}),
+				};
+			}
+
+			if (choice === "Allow") {
+				return {
+					behavior: "allow",
+					updatedInput: _input,
+					toolUseID: options.toolUseID,
+				};
+			}
+
+			return { behavior: "deny", message: "User denied", toolUseID: options.toolUseID };
+		} catch {
+			return { behavior: "deny", message: "Aborted", toolUseID: options.toolUseID };
+		}
+	};
+}
+
+// ---------------------------------------------------------------------------
+// Elicitation handler
+// ---------------------------------------------------------------------------
+
+/** Create an SDK elicitation handler that routes requests through the extension UI dialogs, or undefined if no UI is available. */
 export function createClaudeCodeElicitationHandler(
 	ui: ExtensionUIContext | undefined,
 ): ((request: SdkElicitationRequest, options: { signal: AbortSignal }) => Promise<SdkElicitationResult>) | undefined {
@ -976,18 +1414,26 @@ async function pumpSdkMessages(
 		const prompt = buildPromptFromContext(context);
 		const queryPrompt = buildSdkQueryPrompt(context, prompt);
 		const permissionMode = await resolveClaudePermissionMode();
+		const uiContext = (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext;
+		const canUseToolHandler = createClaudeCodeCanUseToolHandler(uiContext);
+		// When no UI is available (headless / auto-mode), auto-approve all
+		// tool requests. This replaces the old bypassPermissions workaround.
+		const canUseToolFallback = canUseToolHandler
+			?? (async (_toolName: string, _input: Record<string, unknown>, opts: CanUseToolOptions): Promise<CanUseToolPermissionResult> =>
+				({ behavior: "allow", toolUseID: opts.toolUseID }));
 		const sdkOpts = buildSdkOptions(
 			modelId,
 			prompt,
 			{ permissionMode },
-			typeof (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext === "object"
-				? {
-						reasoning: options?.reasoning,
-						onElicitation: createClaudeCodeElicitationHandler(
-							(options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext,
-						),
-					}
-				: { reasoning: options?.reasoning },
+			{
+				reasoning: options?.reasoning,
+				canUseTool: canUseToolFallback,
+				...(uiContext
+					? {
+							onElicitation: createClaudeCodeElicitationHandler(uiContext),
+						}
+					: {}),
+			},
 		);

 		const queryResult = sdk.query({
--- a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
+++ b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
@ -12,6 +12,10 @@ import {
 	buildPromptFromContext,
 	buildSdkQueryPrompt,
 	buildSdkOptions,
+	createClaudeCodeCanUseToolHandler,
+	buildBashPermissionPattern,
+	buildBashPermissionPatternOptions,
+	bashCommandMatchesSavedRules,
 	createClaudeCodeElicitationHandler,
 	extractImageBlocksFromContext,
 	extractToolResultsFromSdkUserMessage,
@ -1101,3 +1105,811 @@ describe("stream-adapter — Windows Claude path lookup (#3770)", () => {
 		assert.equal(parseClaudeLookupOutput(output), "C:\\Users\\Binoy\\.local\\bin\\claude.exe");
 	});
 });
+
+// ---------------------------------------------------------------------------
+// canUseTool handler (#4383)
+// ---------------------------------------------------------------------------
+
+describe("stream-adapter — canUseTool handler", () => {
+	function makeOptions(overrides: Partial<{ signal: AbortSignal; suggestions: Array<Record<string, unknown>>; title: string; description: string; toolUseID: string }> = {}) {
+		return {
+			signal: overrides.signal ?? new AbortController().signal,
+			toolUseID: overrides.toolUseID ?? "toolu_test123",
+			...(overrides.title !== undefined ? { title: overrides.title } : {}),
+			...(overrides.description !== undefined ? { description: overrides.description } : {}),
+			...(overrides.suggestions !== undefined ? { suggestions: overrides.suggestions } : {}),
+		};
+	}
+
+	// Point process.cwd() at an empty temp dir so the real repo's
+	// .claude/settings.local.json (which may already contain rules like
+	// "Bash(gh pr list:*)") does not short-circuit the permission flow.
+	// Returns a cleanup function that restores cwd and removes the temp dir.
+	// biome-ignore lint/suspicious/noExplicitAny: test-only monkey-patch
+	function withIsolatedCwd(): () => void {
+		const dir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-canusetool-")));
+		const orig = process.cwd;
+		process.cwd = () => dir;
+		return () => {
+			process.cwd = orig;
+			rmSync(dir, { recursive: true, force: true });
+		};
+	}
+
+	test("returns undefined when no UI context is provided", () => {
+		const handler = createClaudeCodeCanUseToolHandler(undefined);
+		assert.equal(handler, undefined);
+	});
+
+	test("shows select dialog with Allow/Always Allow/Deny and returns allow", async () => {
+		let selectPrompt = "";
+		let selectOptions: string[] = [];
+		const ui = {
+			select: async (prompt: string, options: string[]) => {
+				selectPrompt = prompt;
+				selectOptions = options;
+				return "Allow";
+			},
+		};
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		assert.ok(handler);
+
+		const input = { command: "ls -la" };
+		const result = await handler!("Bash", input, makeOptions({
+			title: "Claude wants to run: ls -la",
+			description: "List directory contents",
+		}));
+
+		assert.equal(result.behavior, "allow");
+		assert.deepEqual((result as any).updatedInput, input);
+		assert.equal((result as any).toolUseID, "toolu_test123");
+		// Allow (one-time) should NOT include updatedPermissions
+		assert.equal((result as any).updatedPermissions, undefined);
+		assert.deepEqual(selectOptions, ["Allow", "Always Allow", "Deny"]);
+		// Prompt includes title and input summary
+		assert.ok(selectPrompt.includes("Claude wants to run: ls -la"));
+		assert.ok(selectPrompt.includes("ls -la"));
+	});
+
+	test("returns deny when user selects Deny", async () => {
+		const ui = {
+			select: async () => "Deny",
+		};
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		const result = await handler!("Bash", { command: "rm -rf /" }, makeOptions());
+
+		assert.equal(result.behavior, "deny");
+		assert.equal((result as any).message, "User denied");
+		assert.equal((result as any).toolUseID, "toolu_test123");
+	});
+
+	test("returns deny when user dismisses dialog (undefined)", async () => {
+		const ui = {
+			select: async () => undefined,
+		};
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		const result = await handler!("Bash", { command: "echo hi" }, makeOptions());
+
+		assert.equal(result.behavior, "deny");
+		assert.equal((result as any).message, "User denied");
+	});
+
+	test("Always Allow for Bash patches SDK suggestions with smart ruleContent", async () => {
+		const notified: string[] = [];
+		const ui = { select: async (_p: string, opts: string[]) => opts.find((o) => o.startsWith("Always Allow"))!, notify: (msg: string) => notified.push(msg) };
+		const suggestions = [{
+			type: "addRules",
+			rules: [{ toolName: "Bash", ruleContent: "ls -la /tmp" }],
+			behavior: "allow",
+			destination: "localSettings",
+		}];
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		const result = await handler!("Bash", { command: "ls -la /tmp" }, makeOptions({ suggestions }));
+
+		assert.equal(result.behavior, "allow");
+		// Should patch ruleContent with our smart pattern, preserving SDK structure
+		assert.deepEqual((result as any).updatedPermissions, [{
+			type: "addRules",
+			rules: [{ toolName: "Bash", ruleContent: "ls:*" }],
+			behavior: "allow",
+			destination: "localSettings",
+		}]);
+		assert.equal(notified.length, 1);
+		assert.ok(notified[0].includes("Saved:") && notified[0].includes("Bash(ls:*)"));
+	});
+
+	test("Always Allow for Bash with subcommand-sensitive CLI captures verb", async () => {
+		const cleanup = withIsolatedCwd();
+		try {
+			const notified: string[] = [];
+			// First select call: pick "Always Allow ..."; second call (level
+			// picker): pick the "git push" granularity explicitly.
+			let selectCall = 0;
+			const ui = {
+				select: async (_p: string, opts: string[]) => {
+					selectCall++;
+					if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!;
+					return "Bash(git push:*)";
+				},
+				notify: (msg: string) => notified.push(msg),
+			};
+			const suggestions = [{
+				type: "addRules",
+				rules: [{ toolName: "Bash", ruleContent: "git push origin main" }],
+				behavior: "allow",
+				destination: "localSettings",
+			}];
+
+			const handler = createClaudeCodeCanUseToolHandler(ui as any);
+			const result = await handler!("Bash", { command: "git push origin main" }, makeOptions({ suggestions }));
+
+			assert.equal(result.behavior, "allow");
+			assert.deepEqual((result as any).updatedPermissions, [{
+				type: "addRules",
+				rules: [{ toolName: "Bash", ruleContent: "git push:*" }],
+				behavior: "allow",
+				destination: "localSettings",
+			}]);
+			assert.ok(notified[0].includes("Saved:") && notified[0].includes("Bash(git push:*)"));
+		} finally {
+			cleanup();
+		}
+	});
+
+	test("Always Allow for Bash without suggestions builds proper PermissionUpdate", async () => {
+		const cleanup = withIsolatedCwd();
+		try {
+			const notified: string[] = [];
+			let selectCall = 0;
+			const ui = {
+				select: async (_p: string, opts: string[]) => {
+					selectCall++;
+					if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!;
+					return "Bash(gh pr list:*)";
+				},
+				notify: (msg: string) => notified.push(msg),
+			};
+
+			const handler = createClaudeCodeCanUseToolHandler(ui as any);
+			const result = await handler!("Bash", { command: "gh pr list" }, makeOptions());
+
+			assert.equal(result.behavior, "allow");
+			// No SDK suggestions → builds PermissionUpdate from scratch
+			assert.deepEqual((result as any).updatedPermissions, [{
+				type: "addRules",
+				rules: [{ toolName: "Bash", ruleContent: "gh pr list:*" }],
+				behavior: "allow",
+				destination: "localSettings",
+			}]);
+			assert.ok(notified[0].includes("Saved:") && notified[0].includes("Bash(gh pr list:*)"));
+		} finally {
+			cleanup();
+		}
+	});
+
+	test("Always Allow for non-Bash tools passes SDK suggestions through", async () => {
+		const notified: string[] = [];
+		const ui = { select: async (_p: string, opts: string[]) => opts.find((o) => o.startsWith("Always Allow"))!, notify: (msg: string) => notified.push(msg) };
+		const suggestions = [{
+			type: "addRules",
+			rules: [{ toolName: "Write" }],
+			behavior: "allow",
+			destination: "localSettings",
+		}];
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		const result = await handler!("Write", { file_path: "/tmp/test.txt" }, makeOptions({ suggestions }));
+
+		assert.equal(result.behavior, "allow");
+		assert.deepEqual((result as any).updatedPermissions, suggestions);
+		// Non-Bash tools don't emit a post-selection notification (only Bash runs the level picker)
+		assert.equal(notified.length, 0);
+	});
+
+	test("Always Allow for non-Bash without suggestions omits updatedPermissions", async () => {
+		const notified: string[] = [];
+		const ui = { select: async (_p: string, opts: string[]) => opts.find((o) => o.startsWith("Always Allow"))!, notify: (msg: string) => notified.push(msg) };
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		const result = await handler!("Write", { file_path: "/tmp/test.txt" }, makeOptions());
+
+		assert.equal(result.behavior, "allow");
+		assert.equal((result as any).updatedPermissions, undefined);
+		// No suggestions → no notification
+		assert.equal(notified.length, 0);
+	});
+
+	test("prompt includes command text for Bash tools", async () => {
+		let selectPrompt = "";
+		const ui = {
+			select: async (prompt: string) => {
+				selectPrompt = prompt;
+				return "Allow";
+			},
+		};
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		await handler!("Bash", { command: "git status" }, makeOptions());
+		assert.ok(selectPrompt.includes("git status"), `prompt should include command: ${selectPrompt}`);
+	});
+
+	test("prompt includes file_path for file tools", async () => {
+		let selectPrompt = "";
+		const ui = {
+			select: async (prompt: string) => {
+				selectPrompt = prompt;
+				return "Allow";
+			},
+		};
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		await handler!("Write", { file_path: "/tmp/test.txt", content: "hello" }, makeOptions());
+		assert.ok(selectPrompt.includes("/tmp/test.txt"), `prompt should include file path: ${selectPrompt}`);
+	});
+
+	test("uses title from options when available", async () => {
+		let selectPrompt = "";
+		const ui = {
+			select: async (prompt: string) => {
+				selectPrompt = prompt;
+				return "Allow";
+			},
+		};
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		await handler!("WebFetch", {}, makeOptions({ title: "Claude wants to fetch: https://example.com" }));
+		assert.ok(selectPrompt.includes("Claude wants to fetch: https://example.com"));
+	});
+
+	test("falls back to default title when options.title is missing", async () => {
+		let selectPrompt = "";
+		const ui = {
+			select: async (prompt: string) => {
+				selectPrompt = prompt;
+				return "Allow";
+			},
+		};
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		await handler!("WebFetch", { url: "https://example.com" }, makeOptions());
+		assert.ok(selectPrompt.includes("Allow Claude Code to use: WebFetch?"));
+	});
+
+	test("returns deny when signal is already aborted", async () => {
+		const ui = {
+			select: async () => { throw new Error("should not be called"); },
+		};
+
+		const controller = new AbortController();
+		controller.abort();
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		const result = await handler!("Bash", {}, makeOptions({ signal: controller.signal }));
+
+		assert.equal(result.behavior, "deny");
+		assert.equal((result as any).message, "Aborted");
+	});
+
+	test("returns deny when ui.select throws", async () => {
+		const ui = {
+			select: async () => { throw new Error("dialog crashed"); },
+		};
+
+		const handler = createClaudeCodeCanUseToolHandler(ui as any);
+		const result = await handler!("Bash", {}, makeOptions());
+
+		assert.equal(result.behavior, "deny");
+		assert.equal((result as any).message, "Aborted");
+	});
+
+	test("buildSdkOptions passes canUseTool through extraOptions", () => {
+		const canUseTool = async () => ({ behavior: "allow" as const, updatedInput: {}, toolUseID: "test" });
+		const opts = buildSdkOptions("claude-sonnet-4-6", "test", undefined, { canUseTool });
+		assert.equal(opts.canUseTool, canUseTool);
+	});
+
+	test("Always Allow shows level picker and user broadens to base command", async () => {
+		const cleanup = withIsolatedCwd();
+		try {
+			const prompts: string[] = [];
+			const levelOpts: string[][] = [];
+			let selectCall = 0;
+			const ui = {
+				select: async (prompt: string, opts: string[]) => {
+					prompts.push(prompt);
+					selectCall++;
+					if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!;
+					levelOpts.push(opts);
+					return "Bash(gh:*)";
+				},
+				notify: () => {},
+			};
+
+			const handler = createClaudeCodeCanUseToolHandler(ui as any);
+			const result = await handler!("Bash", { command: "gh pr list" }, makeOptions());
+
+			assert.equal(result.behavior, "allow");
+			assert.deepEqual((result as any).updatedPermissions, [{
+				type: "addRules",
+				rules: [{ toolName: "Bash", ruleContent: "gh:*" }],
+				behavior: "allow",
+				destination: "localSettings",
+			}]);
+			// Second dialog offered every granularity level
+			assert.deepEqual(levelOpts[0], [
+				"Bash(gh:*)",
+				"Bash(gh pr:*)",
+				"Bash(gh pr list:*)",
+			]);
+			assert.ok(prompts[1].includes("Save permission at which level?"));
+		} finally {
+			cleanup();
+		}
+	});
+
+	test("Always Allow narrows to mid-level pattern when user picks Bash(gh pr:*)", async () => {
+		const cleanup = withIsolatedCwd();
+		try {
+			let selectCall = 0;
+			const ui = {
+				select: async (_p: string, opts: string[]) => {
+					selectCall++;
+					if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!;
+					return "Bash(gh pr:*)";
+				},
+				notify: () => {},
+			};
+
+			const handler = createClaudeCodeCanUseToolHandler(ui as any);
+			const result = await handler!("Bash", { command: "gh pr list --limit 5" }, makeOptions());
+
+			assert.equal(result.behavior, "allow");
+			assert.deepEqual((result as any).updatedPermissions, [{
+				type: "addRules",
+				rules: [{ toolName: "Bash", ruleContent: "gh pr:*" }],
+				behavior: "allow",
+				destination: "localSettings",
+			}]);
+		} finally {
+			cleanup();
+		}
+	});
+
+	test("Always Allow skips level picker when only one pattern is available", async () => {
+		const cleanup = withIsolatedCwd();
+		try {
+			const prompts: string[] = [];
+			const ui = {
+				select: async (prompt: string, opts: string[]) => {
+					prompts.push(prompt);
+					return opts.find((o) => o.startsWith("Always Allow"))!;
+				},
+				notify: () => {},
+			};
+
+			const handler = createClaudeCodeCanUseToolHandler(ui as any);
+			const result = await handler!("Bash", { command: "ls -la /tmp" }, makeOptions());
+
+			assert.equal(result.behavior, "allow");
+			// "ls" has no subcommand tokens before the flag → single-option path
+			assert.equal(prompts.length, 1, "should not show a second dialog");
+			assert.deepEqual((result as any).updatedPermissions, [{
+				type: "addRules",
+				rules: [{ toolName: "Bash", ruleContent: "ls:*" }],
+				behavior: "allow",
+				destination: "localSettings",
+			}]);
+		} finally {
+			cleanup();
+		}
+	});
+
+	test("Always Allow denies the tool when level picker is dismissed", async () => {
+		const cleanup = withIsolatedCwd();
+		try {
+			const notified: string[] = [];
+			let selectCall = 0;
+			const ui = {
+				select: async (_p: string, opts: string[]) => {
+					selectCall++;
+					if (selectCall === 1) return opts.find((o) => o.startsWith("Always Allow"))!;
+					return undefined; // user dismissed level picker
+				},
+				notify: (msg: string) => notified.push(msg),
+			};
+
+			const handler = createClaudeCodeCanUseToolHandler(ui as any);
+			const result = await handler!("Bash", { command: "gh pr list" }, makeOptions());
+
+			// Dismissing the level picker cancels the tool use — a one-time allow
+			// would leave the spawned agent running even though the user bailed.
+			assert.equal(result.behavior, "deny");
+			assert.equal((result as any).updatedPermissions, undefined);
+			assert.equal(notified.length, 0, "no 'Saved:' notification when nothing was saved");
+		} finally {
+			cleanup();
+		}
+	});
+});
+
+// ---------------------------------------------------------------------------
+// buildBashPermissionPattern — smart permission granularity
+// ---------------------------------------------------------------------------
+
+describe("buildBashPermissionPattern", () => {
+	test("simple command wildcards all args", () => {
+		assert.equal(buildBashPermissionPattern("ping -n 4 localhost"), "Bash(ping:*)");
+		assert.equal(buildBashPermissionPattern("echo hello world"), "Bash(echo:*)");
+		assert.equal(buildBashPermissionPattern("ls -la /tmp"), "Bash(ls:*)");
+		assert.equal(buildBashPermissionPattern("node server.js"), "Bash(node:*)");
+	});
+
+	test("git captures one subcommand", () => {
+		assert.equal(buildBashPermissionPattern("git push origin main"), "Bash(git push:*)");
+		assert.equal(buildBashPermissionPattern("git log --oneline"), "Bash(git log:*)");
+		assert.equal(buildBashPermissionPattern("git status"), "Bash(git status:*)");
+	});
+
+	test("gh captures two subcommands", () => {
+		assert.equal(buildBashPermissionPattern("gh pr list"), "Bash(gh pr list:*)");
+		assert.equal(buildBashPermissionPattern("gh pr create --title foo"), "Bash(gh pr create:*)");
+		assert.equal(buildBashPermissionPattern("gh issue view 123"), "Bash(gh issue view:*)");
+	});
+
+	test("npm captures one subcommand", () => {
+		assert.equal(buildBashPermissionPattern("npm install lodash"), "Bash(npm install:*)");
+		assert.equal(buildBashPermissionPattern("npm publish"), "Bash(npm publish:*)");
+		assert.equal(buildBashPermissionPattern("npm run test"), "Bash(npm run:*)");
+	});
+
+	test("npx captures package name", () => {
+		assert.equal(buildBashPermissionPattern("npx vitest run"), "Bash(npx vitest:*)");
+		assert.equal(buildBashPermissionPattern("npx --version"), "Bash(npx --version:*)");
+	});
+
+	test("docker captures one subcommand", () => {
+		assert.equal(buildBashPermissionPattern("docker ps -a"), "Bash(docker ps:*)");
+		assert.equal(buildBashPermissionPattern("docker rm container1"), "Bash(docker rm:*)");
+	});
+
+	test("aws captures two subcommands", () => {
+		assert.equal(buildBashPermissionPattern("aws s3 cp file.txt s3://bucket/"), "Bash(aws s3 cp:*)");
+		assert.equal(buildBashPermissionPattern("aws ec2 describe-instances"), "Bash(aws ec2 describe-instances:*)");
+	});
+
+	test("skips sudo wrapper", () => {
+		assert.equal(buildBashPermissionPattern("sudo ping localhost"), "Bash(ping:*)");
+		assert.equal(buildBashPermissionPattern("sudo git push"), "Bash(git push:*)");
+	});
+
+	test("skips env wrapper and VAR=val assignments", () => {
+		assert.equal(buildBashPermissionPattern("env NODE_ENV=prod node server.js"), "Bash(node:*)");
+		assert.equal(buildBashPermissionPattern("NODE_ENV=prod node server.js"), "Bash(node:*)");
+		assert.equal(buildBashPermissionPattern("FOO=bar BAZ=qux git push"), "Bash(git push:*)");
+	});
+
+	test("strips path from executable", () => {
+		assert.equal(buildBashPermissionPattern("/usr/bin/git push"), "Bash(git push:*)");
+		assert.equal(buildBashPermissionPattern("C:\\Windows\\ping.exe localhost"), "Bash(ping:*)");
+	});
+
+	test("empty or whitespace-only command", () => {
+		assert.equal(buildBashPermissionPattern(""), "Bash(*)");
+		assert.equal(buildBashPermissionPattern("   "), "Bash(*)");
+	});
+
+	test("chained commands — extracts pattern from the meaningful segment", () => {
+		assert.equal(buildBashPermissionPattern("cd /foo && gh pr list --limit 5"), "Bash(gh pr list:*)");
+		assert.equal(buildBashPermissionPattern("cd C:/Users/djeff/repos/gsd-2 && gh pr list --limit 5"), "Bash(gh pr list:*)");
+		assert.equal(buildBashPermissionPattern("cd /tmp && git push origin main"), "Bash(git push:*)");
+		assert.equal(buildBashPermissionPattern("export FOO=1 && npm install lodash"), "Bash(npm install:*)");
+		assert.equal(buildBashPermissionPattern("mkdir -p out; docker ps -a"), "Bash(docker ps:*)");
+		assert.equal(buildBashPermissionPattern("echo start || ping localhost"), "Bash(ping:*)");
+	});
+
+	test("skips trailing || true / || : error suppressors", () => {
+		assert.equal(
+			buildBashPermissionPattern("cd C:/Users/djeff/repos/gsd-2 && gh pr create --dry-run --title \"test\" --body \"test\" 2>&1 || true"),
+			"Bash(gh pr create:*)",
+		);
+		assert.equal(buildBashPermissionPattern("gh pr list || true"), "Bash(gh pr list:*)");
+		assert.equal(buildBashPermissionPattern("git push || :"), "Bash(git push:*)");
+		assert.equal(buildBashPermissionPattern("cd /tmp && npm install || echo failed"), "Bash(npm install:*)");
+	});
+
+	test("single command is unaffected by chain extraction", () => {
+		assert.equal(buildBashPermissionPattern("gh pr list"), "Bash(gh pr list:*)");
+		assert.equal(buildBashPermissionPattern("git push origin main"), "Bash(git push:*)");
+	});
+});
+
+// ---------------------------------------------------------------------------
+// buildBashPermissionPatternOptions — granularity level menu
+// ---------------------------------------------------------------------------
+
+describe("buildBashPermissionPatternOptions", () => {
+	test("offers every prefix from base to full subcommand chain", () => {
+		assert.deepEqual(buildBashPermissionPatternOptions("gh pr list"), [
+			"Bash(gh:*)",
+			"Bash(gh pr:*)",
+			"Bash(gh pr list:*)",
+		]);
+		assert.deepEqual(buildBashPermissionPatternOptions("git push origin main"), [
+			"Bash(git:*)",
+			"Bash(git push:*)",
+			"Bash(git push origin:*)",
+			"Bash(git push origin main:*)",
+		]);
+	});
+
+	test("stops at first flag — flags are args, not verbs", () => {
+		assert.deepEqual(buildBashPermissionPatternOptions("gh pr create --title foo"), [
+			"Bash(gh:*)",
+			"Bash(gh pr:*)",
+			"Bash(gh pr create:*)",
+		]);
+		assert.deepEqual(buildBashPermissionPatternOptions("git log --oneline"), [
+			"Bash(git:*)",
+			"Bash(git log:*)",
+		]);
+	});
+
+	test("single-option when there is no subcommand to choose from", () => {
+		assert.deepEqual(buildBashPermissionPatternOptions("ls -la /tmp"), ["Bash(ls:*)"]);
+		assert.deepEqual(buildBashPermissionPatternOptions("ping -n 4 localhost"), ["Bash(ping:*)"]);
+		assert.deepEqual(buildBashPermissionPatternOptions("node"), ["Bash(node:*)"]);
+	});
+
+	test("extracts meaningful segment from compound commands", () => {
+		assert.deepEqual(buildBashPermissionPatternOptions("cd /foo && gh pr list"), [
+			"Bash(gh:*)",
+			"Bash(gh pr:*)",
+			"Bash(gh pr list:*)",
+		]);
+		assert.deepEqual(buildBashPermissionPatternOptions("gh pr create --dry-run || true"), [
+			"Bash(gh:*)",
+			"Bash(gh pr:*)",
+			"Bash(gh pr create:*)",
+		]);
+	});
+
+	test("caps at three subcommand tokens to keep the menu short", () => {
+		const result = buildBashPermissionPatternOptions("foo bar baz qux quux corge");
+		// base + 3 sub tokens = 4 patterns max
+		assert.equal(result.length, 4);
+		assert.deepEqual(result, [
+			"Bash(foo:*)",
+			"Bash(foo bar:*)",
+			"Bash(foo bar baz:*)",
+			"Bash(foo bar baz qux:*)",
+		]);
+	});
+
+	test("skips sudo/env wrappers like the single-pattern variant", () => {
+		assert.deepEqual(buildBashPermissionPatternOptions("sudo git push origin"), [
+			"Bash(git:*)",
+			"Bash(git push:*)",
+			"Bash(git push origin:*)",
+		]);
+		assert.deepEqual(buildBashPermissionPatternOptions("NODE_ENV=prod node server.js"), [
+			"Bash(node:*)",
+			"Bash(node server.js:*)",
+		]);
+	});
+
+	test("empty command returns the catch-all pattern", () => {
+		assert.deepEqual(buildBashPermissionPatternOptions(""), ["Bash(*)"]);
+		assert.deepEqual(buildBashPermissionPatternOptions("   "), ["Bash(*)"]);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// bashCommandMatchesSavedRules — compound command bypass for saved rules
+// ---------------------------------------------------------------------------
+
+describe("bashCommandMatchesSavedRules — compound command bypass", () => {
+	let tempDir: string;
+	let originalCwd: string;
+
+	// Create a temp project directory with .claude/settings.local.json
+	function setupSettings(allow: string[]): void {
+		const claudeDir = join(tempDir, ".claude");
+		mkdirSync(claudeDir, { recursive: true });
+		writeFileSync(
+			join(claudeDir, "settings.local.json"),
+			JSON.stringify({ permissions: { allow } }),
+		);
+	}
+
+	// biome-ignore lint/suspicious/noExplicitAny: test-only monkey-patch
+	let origCwd: any;
+
+	// Monkey-patch process.cwd() to point at our temp dir
+	function setCwd(dir: string): void {
+		origCwd = process.cwd;
+		process.cwd = () => dir;
+	}
+	function restoreCwd(): void {
+		if (origCwd) process.cwd = origCwd;
+	}
+
+	test("matches cd-prefixed compound command against saved prefix rule", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(gh pr list:*)"]);
+			setCwd(tempDir);
+			assert.equal(
+				bashCommandMatchesSavedRules("cd /some/path && gh pr list --limit 5"),
+				true,
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("matches cd-prefixed compound command with exact subcommand", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(gh pr list:*)"]);
+			setCwd(tempDir);
+			assert.equal(
+				bashCommandMatchesSavedRules("cd C:/Users/foo/repos/bar && gh pr list"),
+				true,
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("rejects when leading segment is not cd", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(gh pr list:*)"]);
+			setCwd(tempDir);
+			// "rm -rf /tmp" is not a cd command — should not auto-approve
+			assert.equal(
+				bashCommandMatchesSavedRules("rm -rf /tmp && gh pr list"),
+				false,
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("rejects when meaningful segment does not match any rule", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(gh pr list:*)"]);
+			setCwd(tempDir);
+			assert.equal(
+				bashCommandMatchesSavedRules("cd /path && gh issue create --title foo"),
+				false,
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("matches simple (non-compound) commands against on-disk rules", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(gh pr list:*)"]);
+			setCwd(tempDir);
+			// Simple commands must also be checked — the SDK's in-memory cache
+			// may be stale if the rule was added mid-session via "Always Allow"
+			assert.equal(bashCommandMatchesSavedRules("gh pr list --limit 5"), true);
+			assert.equal(bashCommandMatchesSavedRules("gh pr list"), true);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("returns false for simple commands with no matching rule", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(gh pr list:*)"]);
+			setCwd(tempDir);
+			assert.equal(bashCommandMatchesSavedRules("gh issue list --limit 5"), false);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("returns false when no settings file exists", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			// No .claude/settings.local.json created
+			setCwd(tempDir);
+			assert.equal(
+				bashCommandMatchesSavedRules("cd /path && gh pr list"),
+				false,
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("matches exact rule (non-prefix)", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(ping -n 4 localhost)"]);
+			setCwd(tempDir);
+			assert.equal(
+				bashCommandMatchesSavedRules("cd /path && ping -n 4 localhost"),
+				true,
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("handles multiple cd segments before the meaningful command", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(npm install:*)"]);
+			setCwd(tempDir);
+			assert.equal(
+				bashCommandMatchesSavedRules("cd /home && cd project && npm install lodash"),
+				true,
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("matches compound command with trailing || true suppressor", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			setupSettings(["Bash(gh pr create:*)"]);
+			setCwd(tempDir);
+			assert.equal(
+				bashCommandMatchesSavedRules('cd C:/Users/djeff/repos/gsd-2 && gh pr create --dry-run --title "test" --body "test" 2>&1 || true'),
+				true,
+			);
+			assert.equal(
+				bashCommandMatchesSavedRules("gh pr create --dry-run || true"),
+				true,
+			);
+			assert.equal(
+				bashCommandMatchesSavedRules("cd /tmp && git push || :"),
+				false, // rule is for gh pr create, not git push
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+
+	test("reads rules from settings.json as well as settings.local.json", () => {
+		tempDir = realpathSync(mkdtempSync(join(tmpdir(), "gsd-rules-")));
+		try {
+			const claudeDir = join(tempDir, ".claude");
+			mkdirSync(claudeDir, { recursive: true });
+			writeFileSync(
+				join(claudeDir, "settings.json"),
+				JSON.stringify({ permissions: { allow: ["Bash(git push:*)"] } }),
+			);
+			setCwd(tempDir);
+			assert.equal(
+				bashCommandMatchesSavedRules("cd /repo && git push origin main"),
+				true,
+			);
+		} finally {
+			restoreCwd();
+			rmSync(tempDir, { recursive: true, force: true });
+		}
+	});
+});
--- a/src/resources/extensions/sf/auto-dispatch.ts
+++ b/src/resources/extensions/sf/auto-dispatch.ts
@ -55,6 +55,8 @@ import {
 import { resolveModelWithFallbacksForUnit } from "./preferences-models.js";
 import { resolveUokFlags } from "./uok/flags.js";
 import { selectReactiveDispatchBatch } from "./uok/execution-graph.js";
+import { EXECUTION_ENTRY_PHASES } from "./uok/plan-v2.js";
+import { getMilestonePipelineVariant } from "./milestone-scope-classifier.js";

 // ─── Types ────────────────────────────────────────────────────────────────

@ -95,6 +97,11 @@ function missingSliceStop(mid: string, phase: string): DispatchAction {
  };
 }

+function isMilestonePlanRepairState(state: SFState): boolean {
+  if (state.phase !== "planning" || state.activeSlice) return false;
+  return /roadmap is incomplete|weighted vision alignment meeting/i.test(state.nextAction ?? "");
+}
+
 /**
 * Check for milestone slices missing SUMMARY files.
 * Returns array of missing slice IDs, or empty array if all present or DB unavailable.
@ -341,7 +348,7 @@ export const DISPATCH_RULES: DispatchRule[] = [
      // is essential for roadmap integrity. Opt-out via explicit `false`.
      const reassessEnabled = prefs?.phases?.reassess_after_slice ?? true;
      if (!reassessEnabled) return null;
-      const needsReassess = await checkNeedsReassessment(basePath, mid, state);
+      const needsReassess = await checkNeedsReassessment(basePath, mid, state, prefs);
      if (!needsReassess) return null;
      return {
        action: "dispatch",
@ -368,6 +375,27 @@ export const DISPATCH_RULES: DispatchRule[] = [
      };
    },
  },
+  {
+    // #4671 — Recovery for execution-entry phases with missing CONTEXT.md.
+    // Once deriveStateFromDb returns an execution-entry phase the pre-planning
+    // guard no longer fires. The plan-v2 gate detects missing context but can
+    // only block — it cannot redispatch. Without this rule the milestone is
+    // stuck until `sf doctor heal`. Fire BEFORE execution-entry phase rules.
+    name: "execution-entry phase (no context) → discuss-milestone",
+    match: async ({ state, mid, midTitle, basePath }) => {
+      if (!EXECUTION_ENTRY_PHASES.has(state.phase)) return null;
+      const contextFile = resolveMilestoneFile(basePath, mid, "CONTEXT");
+      const contextContent = contextFile ? await loadFile(contextFile) : null;
+      const hasContext = !!(contextContent && contextContent.trim().length > 0);
+      if (hasContext) return null;
+      return {
+        action: "dispatch",
+        unitType: "discuss-milestone",
+        unitId: mid,
+        prompt: await buildDiscussMilestonePrompt(mid, midTitle, basePath),
+      };
+    },
+  },
  {
    name: "pre-planning (no context) → discuss-milestone",
    match: async ({ state, mid, midTitle, basePath }) => {
@ -411,6 +439,18 @@ export const DISPATCH_RULES: DispatchRule[] = [
      };
    },
  },
+  {
+    name: "planning (roadmap incomplete) → plan-milestone",
+    match: async ({ state, mid, midTitle, basePath }) => {
+      if (!isMilestonePlanRepairState(state)) return null;
+      return {
+        action: "dispatch",
+        unitType: "plan-milestone",
+        unitId: mid,
+        prompt: await buildPlanMilestonePrompt(mid, midTitle, basePath),
+      };
+    },
+  },
  {
    // Keep this rule before the single-slice research rule so the multi-slice
    // path wins whenever 2+ slices are ready.
@ -474,6 +514,8 @@ export const DISPATCH_RULES: DispatchRule[] = [
      // Phase skip: skip research when preference or profile says so
      if (prefs?.phases?.skip_research || prefs?.phases?.skip_slice_research)
        return null;
+      // #4781 phase 2: trivial-scope milestones skip dedicated slice research
+      if (await getMilestonePipelineVariant(mid) === "trivial") return null;
      if (!state.activeSlice) return missingSliceStop(mid, state.phase);
      const sid = state.activeSlice!.id;
      const sTitle = state.activeSlice!.title;
--- a/src/resources/extensions/sf/auto-prompts.ts
+++ b/src/resources/extensions/sf/auto-prompts.ts
--- a/src/resources/extensions/sf/auto/phases.ts
+++ b/src/resources/extensions/sf/auto/phases.ts
@ -48,7 +48,7 @@ import { withTimeout, FINALIZE_PRE_TIMEOUT_MS, FINALIZE_POST_TIMEOUT_MS } from "
 import { getEligibleSlices } from "../slice-parallel-eligibility.js";
 import { startSliceParallel } from "../slice-parallel-orchestrator.js";
 import { isDbAvailable, getMilestoneSlices } from "../sf-db.js";
-import { ensurePlanV2Graph as ensurePlanningFlowGraph } from "../uok/plan-v2.js";
+import { ensurePlanV2Graph as ensurePlanningFlowGraph, isMissingFinalizedContextResult } from "../uok/plan-v2.js";
 import { resolveUokFlags } from "../uok/flags.js";
 import { UokGateRunner } from "../uok/gate-runner.js";
 import { resetEvidence } from "../safety/evidence-collector.js";
@ -409,18 +409,30 @@ export async function runPreDispatch(
    const compiled = ensurePlanningFlowGraph(s.basePath, state);
    if (!compiled.ok) {
      const reason = compiled.reason ?? "Planning flow compilation failed";
-      await runPreDispatchGate({
-        gateId: "planning-flow-gate",
-        gateType: "policy",
-        outcome: "manual-attention",
-        failureClass: "manual-attention",
-        rationale: "planning flow compile gate failed",
-        findings: reason,
-        milestoneId: state.activeMilestone?.id ?? undefined,
-      });
-      ctx.ui.notify(`Plan gate failed-closed: ${reason}`, "error");
-      await deps.pauseAuto(ctx, pi);
-      return { action: "break", reason: "planning-flow-gate-failed" };
+      if (isMissingFinalizedContextResult(compiled)) {
+        await runPreDispatchGate({
+          gateId: "planning-flow-gate",
+          gateType: "policy",
+          outcome: "pass",
+          failureClass: "none",
+          rationale: "plan v2 missing context recovery deferred to dispatch",
+          findings: reason,
+          milestoneId: state.activeMilestone?.id ?? undefined,
+        });
+      } else {
+        await runPreDispatchGate({
+          gateId: "planning-flow-gate",
+          gateType: "policy",
+          outcome: "manual-attention",
+          failureClass: "manual-attention",
+          rationale: "planning flow compile gate failed",
+          findings: reason,
+          milestoneId: state.activeMilestone?.id ?? undefined,
+        });
+        ctx.ui.notify(`Plan gate failed-closed: ${reason}\n\nIf this keeps happening, try: /sf doctor heal`, "error");
+        await deps.pauseAuto(ctx, pi);
+        return { action: "break", reason: "planning-flow-gate-failed" };
+      }
    }
    await runPreDispatchGate({
      gateId: "planning-flow-gate",
--- a/src/resources/extensions/sf/bootstrap/write-gate.ts
+++ b/src/resources/extensions/sf/bootstrap/write-gate.ts
@ -1,7 +1,13 @@
 import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs";
 import { join } from "node:path";

-const MILESTONE_CONTEXT_RE = /M\d+(?:-[a-z0-9]{6})?-CONTEXT\.md$/;
+/**
+ * Regex matching milestone CONTEXT.md file names in both legacy M001
+ * and unique M001-abc123 formats. Exported so regex-hardening tests
+ * can exercise the real pattern rather than a drift-prone inline
+ * re-implementation.
+ */
+export const MILESTONE_CONTEXT_RE = /M\d+(?:-[a-z0-9]{6})?-CONTEXT\.md$/;
 const CONTEXT_MILESTONE_RE = /(?:^|[/\\])(M\d+(?:-[a-z0-9]{6})?)-CONTEXT\.md$/i;
 const DEPTH_VERIFICATION_MILESTONE_RE = /depth_verification[_-](M\d+(?:-[a-z0-9]{6})?)/i;

@ -28,8 +34,29 @@ const QUEUE_SAFE_TOOLS = new Set([
 /**
 * Bash commands that are read-only / investigative — safe during queue mode.
 * Matches the leading command in a bash invocation.
+ *
+ * Extension policy: add commands here when they are read-only / diagnostic.
+ * Never add commands that mutate project state (write files, run builds that
+ * emit artifacts, install packages, etc.).
+ *
+ * Current read-only additions:
+ *   npm run <diagnostic> — read-only diagnostic scripts: test, lint, typecheck, etc.
+ *                         NOT: build, install, compile, generate, deploy (artifact-producing)
+ *   npm ls/list/info    — inspect installed packages (read-only)
+ *   npm outdated/audit  — security/update checks (read-only)
+ *   npx <pkg>           — run a package binary without installing globally
+ *   tsx                 — TypeScript runner used for dry-run / inspection scripts
+ *   node --print        — evaluate and print an expression, no side effects
+ *   python / python3    — script inspection, version checks
+ *   pip / pip3 show     — show installed package info (read-only)
+ *   jq                  — read-only JSON query
+ *   yq                  — read-only YAML query
+ *   curl -s / curl --silent — fetch for inspection (no -o / no output redirect)
+ *   openssl version     — version / certificate inspection
+ *   env / printenv      — print environment variables
+ *   true / false        — shell no-ops / test exit codes
 */
-const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.sf|rtk\s)/;
+const BASH_READ_ONLY_RE = /^\s*(cat|head|tail|less|more|wc|file|stat|du|df|which|type|echo|printf|ls|find|grep|rg|awk|sed\b(?!.*-i)|sort|uniq|diff|comm|tr|cut|tee\s+-a\s+\/dev\/null|git\s+(log|show|diff|status|branch|tag|remote|rev-parse|ls-files|blame|shortlog|describe|stash\s+list|config\s+--get|cat-file)|gh\s+(issue|pr|api|repo|release)\s+(view|list|diff|status|checks)|mkdir\s+-p\s+\.sf|rtk\s|npm\s+run\s+(test|test:\w+|lint|lint:\w+|typecheck|type-check|type-check:\w+|check|verify|audit|outdated|format:check|ci|validate)\b|npm\s+(ls|list|info|view|show|outdated|audit|explain|doctor|ping|--version|-v)\b|npx\s|tsx\s|node\s+(--print|--version|-v\b)|python[23]?\s+(-c\s+'[^']*'|--version|-V\b|-m\s+(pip\s+show|pip\s+list|site))|pip[23]?\s+(show|list|freeze|check|index\s+versions)\b|jq\s|yq\s|curl\s+(-s\b|--silent\b)(?!\s+[^|>]*\s-[oO]\b)(?!\s+[^|>]*\s--output\b)[^|>]*$|openssl\s+(version|x509|s_client)|env\b|printenv\b|true\b|false\b)/;

 const verifiedDepthMilestones = new Set<string>();
 let activeQueuePhase = false;
@ -117,9 +144,21 @@ function normalizeWriteGateSnapshot(value: unknown): WriteGateSnapshot {
  };
 }

+const EMPTY_SNAPSHOT: WriteGateSnapshot = {
+  verifiedDepthMilestones: [],
+  activeQueuePhase: false,
+  pendingGateId: null,
+};
+
 export function loadWriteGateSnapshot(basePath: string = process.cwd()): WriteGateSnapshot {
  const path = writeGateSnapshotPath(basePath);
-  if (!existsSync(path)) return currentWriteGateSnapshot();
+  if (!existsSync(path)) {
+    // When persist mode is active and the file has been deleted, treat it as a
+    // full state reset so deleting the file clears the HARD BLOCK gate.
+    // In non-persist mode the file is never written, so fall back to in-memory.
+    if (shouldPersistWriteGateSnapshot()) return EMPTY_SNAPSHOT;
+    return currentWriteGateSnapshot();
+  }
  try {
    return normalizeWriteGateSnapshot(JSON.parse(readFileSync(path, "utf-8")));
  } catch {
--- a/src/resources/extensions/sf/commands-bootstrap.ts
+++ b/src/resources/extensions/sf/commands-bootstrap.ts
@ -225,6 +225,7 @@ function getGsdArgumentCompletions(prefix: string) {
      { cmd: "update", desc: "Refresh the CODEBASE.md cache immediately" },
      { cmd: "stats", desc: "Show codebase-map coverage and generation time" },
      { cmd: "rag", desc: "Inspect optional project-rag code search backend" },
+      { cmd: "rag build", desc: "Build vendored Rust project-rag and configure MCP" },
      { cmd: "help", desc: "Show usage and subcommands" },
    ], "codebase");
  }
--- a/src/resources/extensions/sf/commands-codebase.ts
+++ b/src/resources/extensions/sf/commands-codebase.ts
@ -15,6 +15,7 @@ import {
  readCodebaseMap,
 } from "./codebase-generator.js";
 import {
+  buildProjectRagBinary,
  ensureProjectRagMcpConfig,
  formatProjectRagStatus,
 } from "./code-intelligence.js";
@ -26,7 +27,7 @@ const USAGE =
  "  generate [--max-files N] [--collapse-threshold N]  — Generate or regenerate CODEBASE.md\n" +
  "  update [--max-files N] [--collapse-threshold N]    — Refresh the CODEBASE.md cache immediately\n" +
  "  stats                                              — Show file count, coverage, and generation time\n" +
-  "  rag [status|init]                                  — Inspect or configure optional project-rag MCP backend\n" +
+  "  rag [status|init|build]                            — Inspect, build, or configure optional project-rag MCP backend\n" +
  "  help                                               — Show this help\n\n" +
  "With no subcommand, shows stats if a map exists or help if not.\n" +
  "SF also refreshes CODEBASE.md automatically before prompt injection and after completed units when tracked files change.\n\n" +
@ -35,8 +36,8 @@ const USAGE =
  "    exclude_patterns: [\"docs/\", \"fixtures/\"]\n" +
  "    max_files: 1000\n" +
  "    collapse_threshold: 15\n" +
-  "    project_rag: auto  # auto | off | required\n" +
-  "    project_rag_auto_index: true";
+    "    project_rag: auto  # auto | off | required\n" +
+    "    project_rag_auto_index: true";

 export async function handleCodebase(
  args: string,
@ -141,7 +142,35 @@ export async function handleCodebase(
        }
        return;
      }
-      ctx.ui.notify(`Unknown /sf codebase rag action "${action}". Use status or init.`, "warning");
+      if (action === "build") {
+        try {
+          const build = buildProjectRagBinary(basePath);
+          const result = ensureProjectRagMcpConfig(basePath, {
+            ...process.env,
+            SF_PROJECT_RAG_BIN: build.binaryPath,
+          });
+          ctx.ui.notify(
+            [
+              "Built project-rag release binary.",
+              "",
+              `Source: ${build.sourceDir}`,
+              `Binary: ${build.binaryPath}`,
+              `Cargo jobs: ${build.buildJobs} (override with SF_PROJECT_RAG_BUILD_JOBS)`,
+              `MCP config: ${result.configPath} (${result.status})`,
+              "",
+              "Restart the MCP client session so the new server and tools are loaded.",
+            ].join("\n"),
+            "success",
+          );
+        } catch (err) {
+          ctx.ui.notify(
+            `Could not build project-rag: ${err instanceof Error ? err.message : String(err)}`,
+            "warning",
+          );
+        }
+        return;
+      }
+      ctx.ui.notify(`Unknown /sf codebase rag action "${action}". Use status, init, or build.`, "warning");
      return;
    }

--- a/src/resources/extensions/sf/commands/catalog.ts
+++ b/src/resources/extensions/sf/commands/catalog.ts
@ -250,6 +250,7 @@ const NESTED_COMPLETIONS: CompletionMap = {
    { cmd: "stats", desc: "Show file count, description coverage, and generation time" },
    { cmd: "rag status", desc: "Show optional project-rag MCP backend status" },
    { cmd: "rag init", desc: "Write .mcp.json entry for project-rag when a binary is available" },
+    { cmd: "rag build", desc: "Build vendored Rust project-rag and write MCP config" },
    { cmd: "help", desc: "Show usage and available subcommands" },
  ],
  ship: [
--- a/src/resources/extensions/sf/context-store.ts
+++ b/src/resources/extensions/sf/context-store.ts
@ -211,7 +211,13 @@ export function queryProject(): string | null {

 /**
 * Filter KNOWLEDGE.md sections by keyword matching.
- * Uses H2 sections, matches keywords case-insensitively against:
+ *
+ * Structure-adaptive (issue #4719): files that organise entries as H3 items
+ * under one or more H2 topics are filtered at H3 granularity. Files with only
+ * H2 topic headers (no H3) fall back to H2-level filtering for backwards
+ * compatibility.
+ *
+ * Matches keywords case-insensitively against:
 * 1. Section header text
 * 2. First paragraph of section content (up to first blank line or next heading)
 *
@ -220,7 +226,7 @@ export function queryProject(): string | null {
 *
 * @param content - Full KNOWLEDGE.md content
 * @param keywords - Keywords to match (case-insensitive)
- * @returns Concatenated matching sections with H2 headers, or empty string
+ * @returns Concatenated matching sections with their original heading prefix, or empty string
 */
 export async function queryKnowledge(content: string, keywords: string[]): Promise<string> {
  if (!content || keywords.length === 0) return '';
@ -228,11 +234,23 @@ export async function queryKnowledge(content: string, keywords: string[]): Promi
  // Lazy import to avoid circular dependency
  const { extractAllSections } = await import('./files.js');

-  const sections = extractAllSections(content, 2);
+  // Prefer H3 granularity when available; fall back to H2 for H2-only files.
+  // This prevents single-H2-with-many-H3 layouts from returning the entire
+  // file on a keyword match against the H2 header or its first paragraph.
+  const h3Sections = extractAllSections(content, 3);
+  const useH3 = h3Sections.size > 0;
+  const sections = useH3 ? h3Sections : extractAllSections(content, 2);
  if (sections.size === 0) return '';
+  const prefix = useH3 ? '###' : '##';

-  // Normalize keywords for case-insensitive matching
-  const normalizedKeywords = keywords.map(k => k.toLowerCase());
+  // Trim, lowercase, drop empties, and de-dupe so callers can pass raw
+  // user-provided strings without risking empty-string / whitespace matches.
+  const normalizedKeywords = [...new Set(
+    keywords
+      .map(k => k.trim().toLowerCase())
+      .filter(k => k.length > 0),
+  )];
+  if (normalizedKeywords.length === 0) return '';

  const matchingSections: string[] = [];

@ -240,16 +258,15 @@ export async function queryKnowledge(content: string, keywords: string[]): Promi
    // Extract first paragraph: everything up to first blank line or next heading
    const firstParagraph = body.split(/\n\s*\n|\n#/)[0] || '';

-    // Check if any keyword matches header or first paragraph
    const headerLower = header.toLowerCase();
    const paragraphLower = firstParagraph.toLowerCase();

    const matches = normalizedKeywords.some(kw =>
-      headerLower.includes(kw) || paragraphLower.includes(kw)
+      headerLower.includes(kw) || paragraphLower.includes(kw),
    );

    if (matches) {
-      matchingSections.push(`## ${header}\n\n${body}`);
+      matchingSections.push(`${prefix} ${header}\n\n${body}`);
    }
  }

--- a/src/resources/extensions/sf/docs/preferences-reference.md
+++ b/src/resources/extensions/sf/docs/preferences-reference.md
@ -170,6 +170,9 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
  - `project_rag`: `"auto"`, `"off"`, or `"required"` — use Brainwires/project-rag MCP search when configured. Default: `"auto"`.
  - `project_rag_server`: string — explicit MCP server name when the server cannot be detected from command or args.
  - `project_rag_auto_index`: boolean — whether agents should prefer indexing before querying a configured Project RAG backend. Default: `true`.
+  - `/sf codebase rag status` reports whether the Rust backend is actually operational.
+  - `/sf codebase rag init` writes a `.mcp.json` entry when a `project-rag` binary is available.
+  - `/sf codebase rag build` builds vendored Brainwires/project-rag from `vendor/project-rag` (or `SF_PROJECT_RAG_SOURCE`) with `cargo build --release`, then writes the MCP config. The build defaults to `CARGO_BUILD_JOBS=2` so it does not saturate the workstation; override with `SF_PROJECT_RAG_BUILD_JOBS`.

 - `remote_questions`: route interactive questions to Slack/Discord for headless auto-mode. Keys:
  - `channel`: `"slack"` or `"discord"` — channel type.
--- a/src/resources/extensions/sf/guided-flow.ts
+++ b/src/resources/extensions/sf/guided-flow.ts
@ -628,8 +628,13 @@ export async function showHeadlessMilestoneCreation(
  // Set pending auto start (auto-mode triggers on "Milestone X ready." via checkAutoStartAfterDiscuss)
  pendingAutoStartMap.set(basePath, { ctx, pi, basePath, milestoneId: nextId, createdAt: Date.now() });

-  // Dispatch — headless milestone creation is a planning activity
-  await dispatchWorkflow(pi, prompt, "sf-run", ctx, "plan-milestone");
+  // Dispatch as discuss-milestone. The LLM writes PROJECT.md, REQUIREMENTS.md,
+  // and CONTEXT.md, then calls sf_plan_milestone — this is semantically the
+  // discuss path, just non-interactive. Using "plan-milestone" here caused
+  // model/tool routing to skip discuss-flow tool scoping and
+  // `checkAutoStartAfterDiscuss` guardrails that rely on the
+  // "discuss-"-prefixed unitType.
+  await dispatchWorkflow(pi, prompt, "sf-run", ctx, "discuss-milestone");
 }


--- a/src/resources/extensions/sf/milestone-scope-classifier.ts
+++ b/src/resources/extensions/sf/milestone-scope-classifier.ts
@ -0,0 +1,302 @@
+// GSD-2 — Milestone scope classifier (#4781 / ADR-003 companion).
+//
+// Pure heuristics over milestone planning fields. Produces a PipelineVariant
+// that downstream dispatch logic can use to shape the auto-mode sequence.
+// No LLM calls, no file I/O, sub-millisecond.
+//
+// Distinct from `complexity-classifier.ts`, which decides *model tier*
+// (light/standard/heavy) for an individual unit. This module decides
+// *pipeline topology* for an entire milestone at plan-milestone time.
+//
+// This file ships the classifier in isolation. Dispatch-side wiring
+// lands in follow-up PRs so the classification contract can be reviewed
+// and tested before any behavior change reaches users.
+
+export type PipelineVariant = "trivial" | "standard" | "complex";
+
+export interface MilestoneScopeInput {
+  /** Milestone vision / elevator pitch. Free-form prose. */
+  vision?: string;
+  /** Success criteria, one per array entry. */
+  successCriteria?: string[];
+  /** Milestone title. */
+  title?: string;
+  /** Slice risks declared at plan-milestone time. */
+  keyRisks?: Array<{ risk?: string; whyItMatters?: string }>;
+  /** Definition-of-done lines. */
+  definitionOfDone?: string[];
+  /** Freeform "requirement coverage" marker. */
+  requirementCoverage?: string;
+  /** Verification hints (contract/integration/operational/uat). */
+  verificationContract?: string;
+  verificationIntegration?: string;
+  verificationOperational?: string;
+  verificationUat?: string;
+}
+
+export interface ScopeClassificationResult {
+  variant: PipelineVariant;
+  /** Short human-readable reasons, one per triggered signal. */
+  reasons: string[];
+  /** Sub-signals for telemetry / debugging. Stable across releases. */
+  signals: {
+    triggeredOverride: boolean;
+    complexCount: number;
+    trivialCount: number;
+    fileCountHint: number | null;
+  };
+}
+
+// ─── Keyword sets ─────────────────────────────────────────────────────────
+
+/**
+ * Override keywords that force `standard` (at minimum) regardless of
+ * apparent triviality. Presence of any of these signals work that is
+ * either security-sensitive, irreversible, or requires runtime verification
+ * a "trivial" pipeline would skip.
+ *
+ * Matched as case-insensitive word-boundary substrings. Conservative — err
+ * on the side of including a keyword; over-classifying to `standard` costs
+ * units, under-classifying could ship broken auth/security/migration work.
+ */
+const OVERRIDE_KEYWORDS: ReadonlyArray<string> = [
+  // Security-sensitive
+  "security", "auth", "authn", "authz", "authentication", "authorization",
+  "credential", "secret", "password", "token", "oauth", "encrypt", "decrypt",
+  "vulnerability", "exploit", "permission", "rbac", "acl",
+  // Data-migration / irreversible
+  "migration", "migrate", "schema change", "data migration",
+  "backfill", "drop column", "drop table",
+  // Compliance / regulatory
+  "compliance", "gdpr", "hipaa", "soc2", "pci",
+  // Infra / deploy — runtime verification needed
+  "deploy", "rollout", "canary", "production database",
+];
+
+/**
+ * Keywords that contribute to `complex` classification on their own.
+ * Different from OVERRIDE_KEYWORDS in that a single match bumps to
+ * complex, not just to standard.
+ */
+const COMPLEX_KEYWORDS: ReadonlyArray<string> = [
+  "multi-service", "distributed", "consensus", "saga", "eventual consistency",
+  "breaking change", "api contract change", "schema redesign",
+  "architect", "architecture", "refactor core",
+];
+
+/**
+ * Trivial-signal keywords: presence strongly suggests a simple, contained
+ * deliverable. Only effective when combined with low file count / no tests
+ * / no override keywords.
+ */
+const TRIVIAL_KEYWORDS: ReadonlyArray<string> = [
+  "single file", "one file", "static html", "static page",
+  "one-page", "landing page", "readme", "docs only", "typo", "rename",
+  "spelling", "comment", "changelog",
+  // Browser-only / no-build deliverable shapes (b23 forensic case).
+  "pure html", "browser-based", "no build step", "no build tooling",
+  "localstorage", "client-only", "no backend", "no server", "no backend.",
+];
+
+// ─── Heuristics ───────────────────────────────────────────────────────────
+
+/**
+ * Estimate how many distinct files the milestone will touch, based on
+ * explicit mentions in the input text. Returns `null` when no hint is
+ * discoverable — callers should treat that as "unknown, no signal."
+ */
+function extractFileCountHint(text: string): number | null {
+  // Explicit phrasing: "a single file", "two files", "3 files"
+  const singleFileMatch = /\b(a|one|single)\s+(file|page)\b/i.test(text);
+  if (singleFileMatch) return 1;
+
+  const digitMatch = text.match(/\b(\d+)\s+files?\b/i);
+  if (digitMatch) {
+    const n = parseInt(digitMatch[1], 10);
+    if (!Number.isNaN(n)) return n;
+  }
+
+  const wordMatch = text.match(/\b(two|three|four|five|six|seven|eight|nine|ten)\s+files?\b/i);
+  if (wordMatch) {
+    const wordMap: Record<string, number> = {
+      two: 2, three: 3, four: 4, five: 5,
+      six: 6, seven: 7, eight: 8, nine: 9, ten: 10,
+    };
+    return wordMap[wordMatch[1].toLowerCase()] ?? null;
+  }
+
+  return null;
+}
+
+function containsAnyKeyword(haystack: string, keywords: ReadonlyArray<string>): string[] {
+  const lower = haystack.toLowerCase();
+  const hits: string[] = [];
+  for (const kw of keywords) {
+    // Substring match, not word-boundary — keyword list is curated so that
+    // substring hits rarely overmatch. Phrases like "no authentication" still
+    // match "authentication" and force standard — that's the safe direction.
+    if (lower.includes(kw)) hits.push(kw);
+  }
+  return hits;
+}
+
+/**
+ * True when `term` appears in the text without an immediately preceding
+ * negator (no / without / not / zero / skip) in the same clause. Used to
+ * keep phrases like "no backend" or "no tests" from flipping a trivial-
+ * class milestone to standard. Best-effort; imperfect English parsing,
+ * biased toward false negatives (if unsure, treats term as present —
+ * which routes to standard, the safe pipeline).
+ */
+function mentionsWithoutNegation(text: string, term: string): boolean {
+  const lower = text.toLowerCase();
+  const termPattern = new RegExp(String.raw`\b${term}\b`, "gi");
+  const matches = Array.from(lower.matchAll(termPattern));
+  for (const m of matches) {
+    const start = m.index ?? 0;
+    const windowStart = Math.max(0, start - 30);
+    const window = lower.slice(windowStart, start);
+    // Negator anywhere in the 30-char lookback window counts as negation —
+    // covers "no backend", "without a server", "not using api", "zero
+    // dependencies on an api". If a sentence break intervenes between the
+    // negator and the term, treat as a different clause (positive mention).
+    const hasNegator = /(^|[^a-z0-9])(no|without|not|zero|skip(s|ping)?|drops?)\b/i.test(window);
+    const hasSentenceBreak = /[.;!?]/.test(window);
+    if (hasNegator && !hasSentenceBreak) continue;
+    return true;
+  }
+  return false;
+}
+
+function mentionsTests(haystack: string): boolean {
+  return mentionsWithoutNegation(haystack, "test")
+      || mentionsWithoutNegation(haystack, "tests")
+      || mentionsWithoutNegation(haystack, "testing")
+      || mentionsWithoutNegation(haystack, "spec")
+      || mentionsWithoutNegation(haystack, "unit test")
+      || mentionsWithoutNegation(haystack, "integration test");
+}
+
+function mentionsBackend(haystack: string): boolean {
+  return mentionsWithoutNegation(haystack, "api")
+      || mentionsWithoutNegation(haystack, "backend")
+      || mentionsWithoutNegation(haystack, "server")
+      || mentionsWithoutNegation(haystack, "database")
+      || mentionsWithoutNegation(haystack, "endpoint");
+}
+
+// ─── Public API ───────────────────────────────────────────────────────────
+
+/**
+ * Classify a milestone's pipeline variant based on its planning inputs.
+ *
+ * Precedence:
+ *  1. Override keyword → `standard` (at minimum). Prevents trivial
+ *     misclassification of security / auth / migration work.
+ *  2. Complex-signal keyword OR ≥ 8 file hint OR architecture/refactor-core
+ *     language → `complex`.
+ *  3. Trivial-signal keyword AND ≤ 2 file hint AND no tests mentioned AND
+ *     no backend mentioned → `trivial`.
+ *  4. Otherwise → `standard`.
+ *
+ * Ambiguity → `standard` (today's default). Safe to run the full pipeline.
+ */
+export function classifyMilestoneScope(input: MilestoneScopeInput): ScopeClassificationResult {
+  const haystack = [
+    input.title ?? "",
+    input.vision ?? "",
+    (input.successCriteria ?? []).join("\n"),
+    (input.keyRisks ?? []).map(r => `${r.risk ?? ""} ${r.whyItMatters ?? ""}`).join("\n"),
+    (input.definitionOfDone ?? []).join("\n"),
+    input.requirementCoverage ?? "",
+    input.verificationContract ?? "",
+    input.verificationIntegration ?? "",
+    input.verificationOperational ?? "",
+    input.verificationUat ?? "",
+  ].join("\n");
+
+  const overrideHits = containsAnyKeyword(haystack, OVERRIDE_KEYWORDS);
+  const complexHits = containsAnyKeyword(haystack, COMPLEX_KEYWORDS);
+  const trivialHits = containsAnyKeyword(haystack, TRIVIAL_KEYWORDS);
+  const fileCountHint = extractFileCountHint(haystack);
+  const hasTests = mentionsTests(haystack);
+  const hasBackend = mentionsBackend(haystack);
+
+  const reasons: string[] = [];
+
+  // Rule 2: complex-class signals. Evaluated before override because a
+  // complex + override input should land in complex, not standard.
+  if (complexHits.length > 0) {
+    reasons.push(`complex keywords: ${complexHits.slice(0, 3).join(", ")}`);
+  }
+  if (fileCountHint !== null && fileCountHint >= 8) {
+    reasons.push(`file count hint: ${fileCountHint}`);
+  }
+
+  const isComplex = complexHits.length > 0 || (fileCountHint !== null && fileCountHint >= 8);
+
+  if (isComplex) {
+    return {
+      variant: "complex",
+      reasons,
+      signals: {
+        triggeredOverride: overrideHits.length > 0,
+        complexCount: complexHits.length,
+        trivialCount: trivialHits.length,
+        fileCountHint,
+      },
+    };
+  }
+
+  // Rule 1: override keywords force standard.
+  if (overrideHits.length > 0) {
+    return {
+      variant: "standard",
+      reasons: [`override keywords: ${overrideHits.slice(0, 3).join(", ")}`],
+      signals: {
+        triggeredOverride: true,
+        complexCount: complexHits.length,
+        trivialCount: trivialHits.length,
+        fileCountHint,
+      },
+    };
+  }
+
+  // Rule 3: trivial signals — require ALL of: trivial-keyword, low file
+  // hint (or nothing suggesting high count), no test mention, no backend
+  // mention.
+  const fileCountOk = fileCountHint === null || fileCountHint <= 2;
+  const trivial =
+    trivialHits.length > 0 &&
+    fileCountOk &&
+    !hasTests &&
+    !hasBackend;
+
+  if (trivial) {
+    reasons.push(`trivial keywords: ${trivialHits.slice(0, 3).join(", ")}`);
+    if (fileCountHint !== null) reasons.push(`file count hint: ${fileCountHint}`);
+    reasons.push("no tests mentioned", "no backend mentioned");
+    return {
+      variant: "trivial",
+      reasons,
+      signals: {
+        triggeredOverride: false,
+        complexCount: complexHits.length,
+        trivialCount: trivialHits.length,
+        fileCountHint,
+      },
+    };
+  }
+
+  // Rule 4: fallback.
+  return {
+    variant: "standard",
+    reasons: reasons.length > 0 ? reasons : ["no strong signals — default"],
+    signals: {
+      triggeredOverride: overrideHits.length > 0,
+      complexCount: complexHits.length,
+      trivialCount: trivialHits.length,
+      fileCountHint,
+    },
+  };
+}
--- a/src/resources/extensions/sf/prompt-cache-optimizer.ts
+++ b/src/resources/extensions/sf/prompt-cache-optimizer.ts
@ -55,6 +55,10 @@ const SEMI_STATIC_LABELS = new Set([
  "prior-summaries",
  "project-context",
  "overrides",
+  // KNOWLEDGE is milestone-scoped (stable within a session), so it belongs
+  // in the cacheable prefix. See issue #4719.
+  "knowledge",
+  "project-knowledge",
 ]);

 /** Labels that change per-task */
--- a/src/resources/extensions/sf/prompts/discuss-headless.md
+++ b/src/resources/extensions/sf/prompts/discuss-headless.md
@ -133,6 +133,8 @@ Print a structured depth summary in chat covering:

 This is your audit trail. Print it — do not skip it.

+The final gate is the only question in headless mode. It is not an exploratory question round. Ask it only after printing the compact depth summary, and only to confirm whether the already-investigated context is final enough to write or should remain a draft.
+
 Before writing final `CONTEXT.md`, decide confidence:
 - **HIGH**: You have verified the project knowledge above from actual files/tests/research, and the milestone scope is specific enough for downstream agents. Call `ask_user_questions` once with question ID `depth_verification_{{milestoneId}}_confirm`; make the recommended first option "Proceed with final context (Recommended)" and the second option "Keep as draft". If the confirmed answer is not received, do not bypass the gate.
 - **MEDIUM or LOW**: Do not call the gate. Write `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT-DRAFT.md` with the evidence, assumptions, and open questions, then stop.
--- a/src/resources/extensions/sf/prompts/discuss.md
+++ b/src/resources/extensions/sf/prompts/discuss.md
@ -53,12 +53,21 @@ For subsequent rounds, continue investigating between rounds — check docs, sea

 Questions are organized into four layers. Each layer targets a specific depth dimension. At each layer: ask 1-3 open questions per round, investigate between rounds as needed, and gate before advancing.

+**Question round shape:** Every question round must start with a compact progress header:
+- **Current understanding** — 2-5 bullets using the user's terminology and the evidence you just found
+- **Blocked decision** — the specific choice or uncertainty that prevents the next artifact from being strong
+- **Why these questions** — one sentence explaining how the answers advance the milestone, roadmap, or requirements
+
+If an uncertainty is low-risk or would not change the next artifact, do not ask about it. Continue with a documented assumption instead.
+
 **Default to open questions.** Use `ask_user_questions` only when there are 2-3 genuinely distinct paths with clear tradeoffs (e.g., "REST vs GraphQL" or "Postgres vs SQLite"). For nuanced design questions, ask in plain text and let the user explain.

 **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for binary/ternary choices. Keep option labels short (3-5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.**

 **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1-3 focused questions. Wait for answers before asking the next round.

+After each answer, summarize what materially changed in one concise sentence before continuing. Then update the working context, investigate any newly-opened unknown, and either advance to the next gate/artifact or ask the next focused round.
+
 **Incremental persistence:** After every 2 question rounds (across any layer), silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` using `sf_summary_save` with `artifact_type: "CONTEXT-DRAFT"` and `milestone_id: "{{milestoneId}}"`. This protects confirmed work against session crashes. Do NOT mention this save to the user.

 ### Identify Work Type
--- a/src/resources/extensions/sf/prompts/doctor-heal.md
+++ b/src/resources/extensions/sf/prompts/doctor-heal.md
@ -7,9 +7,10 @@ Rules:
 2. Read before edit.
 3. Prefer fixing authoritative artifacts over masking warnings.
 4. For missing summaries or UAT files, generate the real artifact from existing slice/task context when possible — do not leave placeholders if you can reconstruct the real content.
-5. After each repair cluster, verify the relevant invariant directly from disk.
-6. When done, rerun `/sf doctor {{doctorCommandSuffix}}` mentally by ensuring the remaining issue set for this scope is reduced or cleared.
-7. Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `sf_milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes.
+5. For a missing milestone `CONTEXT.md` when the milestone is already past `pre-planning` (phase is `executing`, `summarizing`, `validating-milestone`, or `completing-milestone`): the artifact was skipped during bootstrap and must be reconstructed before execution can resume. Read `PROJECT.md`, `REQUIREMENTS.md`, the milestone's `ROADMAP.md`, and any slice-level context on disk, then write `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT.md` with the real context. Do not leave a stub — the plan gate will reject it on the next cycle.
+6. After each repair cluster, verify the relevant invariant directly from disk.
+7. When done, rerun `/sf doctor {{doctorCommandSuffix}}` mentally by ensuring the remaining issue set for this scope is reduced or cleared.
+8. Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `sf_milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes.

 ## Doctor Summary

--- a/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
+++ b/src/resources/extensions/sf/prompts/guided-discuss-milestone.md
@ -35,13 +35,20 @@ Ask **1–3 questions per round**. Keep each question focused on one of:
 - **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
 - **What external systems/services this touches** — APIs, databases, third-party services

+**Understanding + progress preface:** Before each question round, write a compact progress header in chat:
+- **Current understanding** — 2–5 bullets using the user's terminology plus the evidence you just found
+- **Blocked decision** — the specific choice or uncertainty that prevents a strong context file or roadmap
+- **Why these questions** — one sentence explaining how the answers advance the milestone
+
+If an uncertainty is low-risk or would not change the context file, do not ask about it. Continue with a documented assumption instead.
+
 **Never fabricate or simulate user input.** Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing.

 **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.**

 **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round.

-After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
+After each answer, summarize what materially changed in one concise sentence, update your working understanding, investigate further if the answer opens a new unknown, then either continue to the next concrete artifact step or ask the next focused round.

 ### Round cadence

--- a/src/resources/extensions/sf/prompts/guided-discuss-slice.md
+++ b/src/resources/extensions/sf/prompts/guided-discuss-slice.md
@ -24,6 +24,13 @@ Do **not** go deep — just enough that your questions reflect what's actually t

 **Never fabricate or simulate user input.** Never generate fake transcript markers like `[User]`, `[Human]`, or `User:`. Ask one question round, then wait for the user's actual response before continuing.

+**Understanding + progress preface:** Before each question round, write a compact progress header in chat:
+- **Current understanding** — 2–5 bullets using the user's terminology plus the evidence you just found
+- **Blocked decision** — the specific choice or uncertainty that prevents a strong slice context
+- **Why these questions** — one sentence explaining how the answers advance the slice
+
+If an uncertainty is low-risk or would not change the slice context, do not ask about it. Continue with a documented assumption instead.
+
 **If `{{structuredQuestionsAvailable}}` is `true`:** Ask **1–3 questions per round** using `ask_user_questions`. **Call `ask_user_questions` exactly once per turn — never make multiple calls with the same or overlapping questions. Wait for the user's response before asking the next round.**
 **If `{{structuredQuestionsAvailable}}` is `false`:** Ask **1–3 questions per round** in plain text. Number them and wait for the user's response before asking the next round.
 Keep each question focused on one of:
@ -32,7 +39,7 @@ Keep each question focused on one of:
 - **Scope boundaries** — what is explicitly in vs out for this slice? What deferred to later?
 - **Feel and experience** — tone, responsiveness, feedback, transitions, what "done" feels like to the user

-After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
+After each answer, summarize what materially changed in one concise sentence, update your working understanding, investigate further if the answer opens a new unknown, then either continue to the next concrete artifact step or ask the next focused round.

 ### Round cadence

--- a/src/resources/extensions/sf/prompts/system.md
+++ b/src/resources/extensions/sf/prompts/system.md
@ -41,6 +41,19 @@ SF ships with bundled skills. Load the relevant skill file with the `read` tool
 - In enduring files, write current state only unless the file is explicitly historical.
 - **Never take outward-facing actions on GitHub (or any external service) without explicit user confirmation.** This includes: creating issues, closing issues, merging PRs, approving PRs, posting comments, pushing to remote branches, publishing packages, or any other action that affects state outside the local filesystem. Read-only operations (listing, viewing, diffing) are fine. Always present what you intend to do and get a clear "yes" before executing. **Non-bypassable:** If the user does not respond, gives an ambiguous answer, or `ask_user_questions` fails, you MUST re-ask — never rationalize past the block ("tool not responding, I'll proceed" is forbidden). A missing "yes" is a "no."

+### Question Efficiency Contract
+
+When you need user input, make the question round move the work forward:
+
+- State current understanding in 2-5 concise bullets before asking.
+- Name the blocked decision: the one choice that cannot be resolved safely from code, docs, or reasonable assumptions.
+- Ask only 1-3 high-leverage questions, each tied to a decision that materially changes plan, context, proof, scope, integration, or risk.
+- Do not ask for facts you can infer by investigating; use tools first.
+- Prefer recommended defaults and short options when using `ask_user_questions`; include the impact/tradeoff in each option and keep a freeform "Other / let me explain" path.
+- If the answer would not change the next artifact or risk is low, continue with a documented assumption instead of blocking.
+- After each answer, summarize what changed, persist or update the relevant context/draft when appropriate, and move to the next concrete step or next focused question round.
+- Never ask a meta "ready?" question unless the depth gate or wrap-up criteria are satisfied.
+
 If a `SF Skill Preferences` block is present below this contract, treat it as explicit durable guidance for which skills to use, prefer, or avoid during SF work. Follow it where it does not conflict with required SF artifact rules, verification requirements, or higher-priority system/developer instructions.

 ### Naming Convention
--- a/src/resources/extensions/sf/tests/context-store.test.ts
+++ b/src/resources/extensions/sf/tests/context-store.test.ts
@ -627,4 +627,83 @@ Integration tests mock external services.

    assert.strictEqual(result, '', 'empty content returns empty string');
  });
+
+  // ── Regression: issue #4719 — single-H2 with many H3 entries ──────────────
+  // A KNOWLEDGE.md structured as one top-level H2 with many H3 entries must
+  // filter at H3 granularity; otherwise one keyword match against the H2
+  // header or first paragraph returns the entire file.
+  test("single H2 with many H3 entries filters at H3 level (issue #4719)", async () => {
+    const singleH2Knowledge = `# Project Knowledge
+
+## Patterns
+
+### Database: prepared statements
+Always use prepared statements with SQLite.
+
+### API: versioned paths
+Use /v1/resource style versioning.
+
+### Testing: node:test
+Prefer node:test over external frameworks.
+
+### Deployment: blue-green
+Blue-green deployment for zero-downtime releases.
+`;
+
+    const result = await queryKnowledge(singleH2Knowledge, ['database']);
+
+    // Should include only the matching H3 entry, not the whole file
+    assert.match(result, /Database: prepared statements/, 'includes matching H3 entry');
+    assert.ok(
+      !result.includes('API: versioned paths'),
+      'does not include non-matching H3 entry',
+    );
+    assert.ok(
+      !result.includes('Testing: node:test'),
+      'does not include non-matching H3 entry',
+    );
+    assert.ok(
+      !result.includes('Deployment: blue-green'),
+      'does not include non-matching H3 entry',
+    );
+    // The returned payload must be dramatically smaller than the full content
+    assert.ok(
+      result.length < singleH2Knowledge.length / 2,
+      `scoped result (${result.length} chars) should be <50% of full content (${singleH2Knowledge.length} chars)`,
+    );
+  });
+
+  test("single H2 with H3 entries returns empty when no H3 matches (issue #4719)", async () => {
+    const singleH2Knowledge = `# Project Knowledge
+
+## Patterns
+
+### Database: prepared statements
+Always use prepared statements with SQLite.
+
+### API: versioned paths
+Use /v1/resource style versioning.
+`;
+
+    const result = await queryKnowledge(singleH2Knowledge, ['nonexistent']);
+
+    assert.strictEqual(result, '', 'no H3 match returns empty string');
+  });
+
+  test("falls back to H2 when no H3 headings exist at all", async () => {
+    // Backwards-compat: files with only H2 topic headers must still filter.
+    const h2OnlyKnowledge = `# Project Knowledge
+
+## Database Patterns
+Use prepared statements.
+
+## API Design
+REST with OpenAPI.
+`;
+
+    const result = await queryKnowledge(h2OnlyKnowledge, ['database']);
+
+    assert.match(result, /Database Patterns/, 'H2-only file falls back to H2 filtering');
+    assert.ok(!result.includes('API Design'), 'non-matching H2 section excluded');
+  });
 });
--- a/src/resources/extensions/sf/tests/integration/state-machine-edge-cases.test.ts
+++ b/src/resources/extensions/sf/tests/integration/state-machine-edge-cases.test.ts
@ -839,6 +839,24 @@ describe("dispatch failure modes", () => {
    assert.equal((result as any).unitType, "discuss-milestone");
  });

+  test("dispatch: incomplete milestone roadmap re-runs plan-milestone instead of missing-slice stop", async () => {
+    base = createFullFixture();
+    openDatabase(join(base, ".sf", "sf.db"));
+
+    const ctx = buildDispatchCtx(base, "M001", {
+      phase: "planning",
+      activeSlice: null,
+      activeTask: null,
+      nextAction: "Milestone M001 roadmap is incomplete (missing vision alignment meeting). Re-run plan-milestone with a weighted vision alignment meeting before execution.",
+    });
+
+    const result = await resolveDispatch(ctx);
+    assert.equal(result.action, "dispatch");
+    assert.equal((result as any).unitType, "plan-milestone");
+    assert.equal((result as any).unitId, "M001");
+    assert.equal((result as any).matchedRule, "planning (roadmap incomplete) → plan-milestone");
+  });
+
  test("dispatch: complete phase → stop with info level", async () => {
    base = createFullFixture();
    openDatabase(join(base, ".sf", "sf.db"));
@ -862,11 +880,14 @@ describe("dispatch failure modes", () => {
    const runUatIdx = ruleNames.indexOf("run-uat (post-completion)");
    const uatGateIdx = ruleNames.indexOf("uat-verdict-gate (non-PASS blocks progression)");
    const executeIdx = ruleNames.indexOf("executing → execute-task");
+    const repairIdx = ruleNames.indexOf("planning (roadmap incomplete) → plan-milestone");
+    const planSliceIdx = ruleNames.indexOf("planning → plan-slice");

    // summarizing should come before execute-task
    assert.ok(summarizeIdx < executeIdx, "summarizing rule should precede execute-task");
    // run-uat should come before uat-verdict-gate
    assert.ok(runUatIdx < uatGateIdx, "run-uat should precede uat-verdict-gate");
+    assert.ok(repairIdx < planSliceIdx, "milestone-plan repair should precede slice planning");
  });
 });

--- a/src/resources/extensions/sf/tests/knowledge.test.ts
+++ b/src/resources/extensions/sf/tests/knowledge.test.ts
@ -15,7 +15,7 @@ import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, rmSync, realpathSy
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 import { SF_ROOT_FILES, resolveSfRootFile } from '../paths.ts';
-import { inlineGsdRootFile } from '../auto-prompts.ts';
+import { inlineGsdRootFile, inlineKnowledgeBudgeted } from '../auto-prompts.ts';
 import { appendKnowledge } from '../files.ts';
 import { loadKnowledgeBlock } from '../bootstrap/system-context.ts';

@ -248,3 +248,95 @@ test('loadKnowledgeBlock: reports globalSizeKb above 4KB threshold', () => {

  rmSync(tmp, { recursive: true, force: true });
 });
+
+// ─── inlineKnowledgeBudgeted — issue #4719 ─────────────────────────────────
+// Milestone-phase prompts must not inject the full KNOWLEDGE.md. The budgeted
+// helper scopes by milestone-level keywords and caps the injected size.
+
+test('inlineKnowledgeBudgeted: returns scoped H3 entries for single-H2 file', async () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-knowledge-')));
+  const gsdDir = join(tmp, '.gsd');
+  mkdirSync(gsdDir, { recursive: true });
+
+  const content = `# Project Knowledge
+
+## Patterns
+
+### Database: prepared statements
+Always use prepared statements with SQLite.
+
+### API: versioned paths
+Use /v1/resource style versioning.
+
+### Testing: node:test
+Prefer node:test over external frameworks.
+`;
+  writeFileSync(join(gsdDir, 'KNOWLEDGE.md'), content);
+
+  const result = await inlineKnowledgeBudgeted(tmp, ['database']);
+  assert.ok(result !== null, 'should return content');
+  assert.ok(result!.includes('Database: prepared statements'), 'includes matching H3');
+  assert.ok(!result!.includes('API: versioned paths'), 'excludes non-matching H3');
+
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test('inlineKnowledgeBudgeted: caps payload below budget for large files', async () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-knowledge-')));
+  const gsdDir = join(tmp, '.gsd');
+  mkdirSync(gsdDir, { recursive: true });
+
+  // Build a 200KB KNOWLEDGE with 500 H3 entries all matching 'shared'
+  const entries = Array.from({ length: 500 }, (_, i) =>
+    `### Entry ${i}: shared topic\n${'filler text '.repeat(30)}\n`,
+  ).join('\n');
+  const content = `# Project Knowledge\n\n## Patterns\n\n${entries}`;
+  writeFileSync(join(gsdDir, 'KNOWLEDGE.md'), content);
+
+  const BUDGET_CHARS = 30_000;
+  const result = await inlineKnowledgeBudgeted(tmp, ['shared'], { maxChars: BUDGET_CHARS });
+  assert.ok(result !== null, 'should return content');
+  // Allow some overhead for header formatting, but must stay close to budget
+  assert.ok(
+    result!.length <= BUDGET_CHARS + 500,
+    `payload ${result!.length} chars should be <= budget ${BUDGET_CHARS} (+overhead)`,
+  );
+  // Far smaller than the raw file
+  assert.ok(
+    result!.length < content.length / 4,
+    `payload should be much smaller than full content (${content.length} chars)`,
+  );
+  assert.match(
+    result!,
+    /\[\.\.\.truncated \d+ chars; rerun with narrower scope if needed\]/,
+    'should include truncation note when budget is exceeded',
+  );
+
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test('inlineKnowledgeBudgeted: returns null when no KNOWLEDGE.md exists', async () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-knowledge-')));
+  const gsdDir = join(tmp, '.gsd');
+  mkdirSync(gsdDir, { recursive: true });
+
+  const result = await inlineKnowledgeBudgeted(tmp, ['database']);
+  assert.strictEqual(result, null);
+
+  rmSync(tmp, { recursive: true, force: true });
+});
+
+test('inlineKnowledgeBudgeted: returns null when no entries match', async () => {
+  const tmp = realpathSync(mkdtempSync(join(tmpdir(), 'gsd-knowledge-')));
+  const gsdDir = join(tmp, '.gsd');
+  mkdirSync(gsdDir, { recursive: true });
+  writeFileSync(
+    join(gsdDir, 'KNOWLEDGE.md'),
+    '# Project Knowledge\n\n## Patterns\n\n### Database\nuse it\n',
+  );
+
+  const result = await inlineKnowledgeBudgeted(tmp, ['nonexistent']);
+  assert.strictEqual(result, null);
+
+  rmSync(tmp, { recursive: true, force: true });
+});
--- a/src/resources/extensions/sf/tests/milestone-scope-classifier.test.ts
+++ b/src/resources/extensions/sf/tests/milestone-scope-classifier.test.ts
@ -0,0 +1,188 @@
+// GSD-2 — #4781: classifier behavior matrix. Pure-function tests, no I/O.
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  classifyMilestoneScope,
+  type MilestoneScopeInput,
+} from "../milestone-scope-classifier.ts";
+
+// ─── Classification matrix ────────────────────────────────────────────────
+
+test("#4781 classifier: single static HTML to-do app → trivial (b23 forensic case)", () => {
+  const input: MilestoneScopeInput = {
+    title: "To-Do App",
+    vision: "A minimal, clean browser-based to-do app. Pure HTML/CSS/JS, no build step, no backend. Tasks persist in localStorage.",
+    successCriteria: [
+      "Open index.html in any browser without a server",
+      "Add tasks by typing and pressing Enter or clicking Add",
+      "Mark tasks complete (toggleable)",
+      "Delete individual tasks",
+      "Tasks survive a page reload via localStorage",
+    ],
+  };
+  const r = classifyMilestoneScope(input);
+  assert.strictEqual(r.variant, "trivial", `expected trivial, got ${r.variant} — reasons: ${r.reasons.join("; ")}`);
+  assert.ok(r.reasons.some(s => s.includes("trivial keywords")), "should cite trivial keywords");
+});
+
+test("#4781 classifier: readme typo fix → trivial", () => {
+  const r = classifyMilestoneScope({
+    title: "Fix README typo",
+    vision: "Correct spelling error in the installation section.",
+    successCriteria: ["Typo fixed", "README renders correctly"],
+  });
+  assert.strictEqual(r.variant, "trivial");
+});
+
+test("#4781 classifier: auth flow single file → standard (override beats trivial)", () => {
+  const r = classifyMilestoneScope({
+    title: "Add login",
+    vision: "Implement authentication flow in a single file with OAuth credentials.",
+    successCriteria: ["User can log in"],
+  });
+  assert.strictEqual(r.variant, "standard", `override should beat single-file signal. reasons: ${r.reasons.join("; ")}`);
+  assert.ok(r.signals.triggeredOverride, "override signals should be flagged");
+  assert.ok(r.reasons.some(s => s.includes("override keywords")));
+});
+
+test("#4781 classifier: security review scope → standard (even if small)", () => {
+  const r = classifyMilestoneScope({
+    title: "Harden session tokens",
+    vision: "Review and patch security vulnerability in one session token helper.",
+    successCriteria: ["No XSS via token"],
+  });
+  assert.strictEqual(r.variant, "standard");
+  assert.ok(r.signals.triggeredOverride);
+});
+
+test("#4781 classifier: schema migration mentioned → complex (overrides override)", () => {
+  const r = classifyMilestoneScope({
+    title: "User profile v2",
+    vision: "Perform schema migration to split user.name into first_name and last_name across the users table.",
+    successCriteria: ["Migration lands", "Existing rows backfilled", "Rollback path validated"],
+  });
+  // schema change + migrate both hit COMPLEX_KEYWORDS ("schema redesign" no; "migration" is in OVERRIDE).
+  // But COMPLEX_KEYWORDS also contains "schema redesign" and "breaking change" — this copy triggers OVERRIDE only.
+  // The classifier precedence puts complex BEFORE override on complex keywords; since none of the
+  // COMPLEX_KEYWORDS fire here ("migration" is only in OVERRIDE), the result is standard, not complex.
+  // This is the correct safe behavior: migration is override-level, not complex-level.
+  assert.strictEqual(r.variant, "standard", `reasons: ${r.reasons.join("; ")}`);
+});
+
+test("#4781 classifier: architecture keyword → complex", () => {
+  const r = classifyMilestoneScope({
+    title: "Redesign plugin registry",
+    vision: "Refactor core architecture of the plugin registry to support versioned contracts.",
+  });
+  assert.strictEqual(r.variant, "complex");
+  assert.ok(r.reasons.some(s => s.includes("complex keywords")));
+});
+
+test("#4781 classifier: >=8 files hint → complex", () => {
+  const r = classifyMilestoneScope({
+    title: "Multi-file refactor",
+    vision: "Touch 12 files to extract shared helpers.",
+  });
+  assert.strictEqual(r.variant, "complex");
+  assert.strictEqual(r.signals.fileCountHint, 12);
+});
+
+test("#4781 classifier: backend API mention → standard (not trivial)", () => {
+  const r = classifyMilestoneScope({
+    title: "Health endpoint",
+    vision: "Add a single-file API endpoint returning status.",
+    successCriteria: ["/health returns 200"],
+  });
+  // Single file + no override + but backend mentioned → not trivial
+  assert.strictEqual(r.variant, "standard");
+});
+
+test("#4781 classifier: tests mentioned → standard (not trivial)", () => {
+  const r = classifyMilestoneScope({
+    title: "Landing page",
+    vision: "Ship a static one-page landing page with unit tests for the form validation.",
+  });
+  assert.strictEqual(r.variant, "standard", `reasons: ${r.reasons.join("; ")}`);
+});
+
+test("#4781 classifier: ambiguous prose → standard (safe default)", () => {
+  const r = classifyMilestoneScope({
+    title: "Generic improvements",
+    vision: "Make the system better.",
+    successCriteria: ["It's better"],
+  });
+  assert.strictEqual(r.variant, "standard");
+  assert.ok(r.reasons.includes("no strong signals — default"));
+});
+
+test("#4781 classifier: empty input → standard (safe default)", () => {
+  const r = classifyMilestoneScope({});
+  assert.strictEqual(r.variant, "standard");
+});
+
+// ─── Override precedence over trivial ──────────────────────────────────────
+
+test("#4781 classifier: override + trivial keyword → standard (override wins)", () => {
+  const r = classifyMilestoneScope({
+    title: "Token rotation",
+    vision: "Single file change to rotate the oauth token expiry schedule.",
+  });
+  // "single file" is trivial signal; "oauth" is override signal. Override wins.
+  assert.strictEqual(r.variant, "standard");
+  assert.ok(r.signals.triggeredOverride);
+});
+
+test("#4781 classifier: complex + override → complex (complex wins, flagged)", () => {
+  const r = classifyMilestoneScope({
+    title: "Auth service refactor",
+    vision: "Refactor core authentication architecture across services.",
+  });
+  // Complex (architecture, refactor core) wins over override (auth).
+  assert.strictEqual(r.variant, "complex");
+  // Override still recorded in signals for telemetry.
+  assert.ok(r.signals.triggeredOverride, "override hits should still be tracked in signals");
+});
+
+// ─── File count hint extraction ───────────────────────────────────────────
+
+test("#4781 classifier: 'a single file' hint parsed as 1", () => {
+  const r = classifyMilestoneScope({
+    title: "Tweak",
+    vision: "Update a single file to flip the copy.",
+  });
+  assert.strictEqual(r.signals.fileCountHint, 1);
+});
+
+test("#4781 classifier: 'two files' hint parsed as 2", () => {
+  const r = classifyMilestoneScope({
+    title: "Minor",
+    vision: "Touch two files.",
+  });
+  assert.strictEqual(r.signals.fileCountHint, 2);
+});
+
+test("#4781 classifier: '12 files' hint parsed as 12", () => {
+  const r = classifyMilestoneScope({
+    title: "Bulk",
+    vision: "Update 12 files.",
+  });
+  assert.strictEqual(r.signals.fileCountHint, 12);
+});
+
+// ─── Reasons surface useful debugging info ─────────────────────────────────
+
+test("#4781 classifier: reasons array populated for every branch", () => {
+  const branches: Array<[string, MilestoneScopeInput]> = [
+    ["trivial", { title: "Readme typo", vision: "Fix a single file typo." }],
+    ["standard (override)", { title: "Auth", vision: "Touch auth helper." }],
+    ["complex (keyword)", { title: "Arch", vision: "Refactor core system design." }],
+    ["complex (file count)", { title: "Bulk", vision: "Update 9 files." }],
+    ["standard (default)", { title: "Generic", vision: "General work." }],
+  ];
+  for (const [label, input] of branches) {
+    const r = classifyMilestoneScope(input);
+    assert.ok(r.reasons.length > 0, `${label}: reasons must not be empty`);
+  }
+});
--- a/src/resources/extensions/sf/tests/prompt-cache-optimizer.test.ts
+++ b/src/resources/extensions/sf/tests/prompt-cache-optimizer.test.ts
@ -64,6 +64,18 @@ describe("prompt-cache-optimizer: classifySection", () => {
    assert.equal(classifySection("overrides"), "semi-static");
  });

+  // Regression: issue #4719 — KNOWLEDGE falls through to dynamic default.
+  // Knowledge content is reused across all tasks within a milestone, so it
+  // must be classified as semi-static to qualify for prefix caching when the
+  // cache optimizer is wired into the prompt path.
+  it("classifies knowledge as semi-static (issue #4719)", () => {
+    assert.equal(classifySection("knowledge"), "semi-static");
+  });
+
+  it("classifies project-knowledge as semi-static (issue #4719)", () => {
+    assert.equal(classifySection("project-knowledge"), "semi-static");
+  });
+
  it("classifies task-plan as dynamic", () => {
    assert.equal(classifySection("task-plan"), "dynamic");
  });
--- a/src/resources/extensions/sf/tests/prompt-contracts.test.ts
+++ b/src/resources/extensions/sf/tests/prompt-contracts.test.ts
@ -57,6 +57,15 @@ test("system prompt hard rules forbid fabricating user responses", () => {
  assert.match(prompt, /ask_user_questions.*only valid structured user input/i);
 });

+test("system prompt makes question rounds efficient and progress-oriented", () => {
+  const prompt = readPrompt("system");
+  assert.match(prompt, /Question Efficiency Contract/i);
+  assert.match(prompt, /State current understanding in 2-5 concise bullets/i);
+  assert.match(prompt, /Name the blocked decision/i);
+  assert.match(prompt, /continue with a documented assumption instead of blocking/i);
+  assert.match(prompt, /After each answer, summarize what changed/i);
+});
+
 test("discuss prompt allows implementation questions when they materially matter", () => {
  const prompt = readPrompt("discuss");
  assert.match(prompt, /Lead with experience, but ask implementation when it materially matters/i);
@ -77,6 +86,29 @@ test("guided discussion prompts avoid wrap-up prompts after every round", () =>
  assert.match(slicePrompt, /Never fabricate or simulate user input/i);
 });

+test("guided discussion prompts require understanding and progress before questions", () => {
+  const milestonePrompt = readPrompt("guided-discuss-milestone");
+  const slicePrompt = readPrompt("guided-discuss-slice");
+  assert.match(milestonePrompt, /Understanding \+ progress preface/i);
+  assert.match(slicePrompt, /Understanding \+ progress preface/i);
+  assert.match(milestonePrompt, /Current understanding/i);
+  assert.match(slicePrompt, /Current understanding/i);
+  assert.match(milestonePrompt, /Blocked decision/i);
+  assert.match(slicePrompt, /Blocked decision/i);
+  assert.match(milestonePrompt, /After each answer, summarize what materially changed/i);
+  assert.match(slicePrompt, /After each answer, summarize what materially changed/i);
+});
+
+test("discuss prompt keeps each question round tied to progress", () => {
+  const prompt = readPrompt("discuss");
+  assert.match(prompt, /Question round shape/i);
+  assert.match(prompt, /Current understanding/i);
+  assert.match(prompt, /Blocked decision/i);
+  assert.match(prompt, /Why these questions/i);
+  assert.match(prompt, /documented assumption/i);
+  assert.match(prompt, /After each answer, summarize what materially changed/i);
+});
+
 test("guided milestone discussion scopes depth verification to the milestone id", () => {
  const prompt = readPrompt("guided-discuss-milestone");
  assert.match(prompt, /depth_verification_\{\{milestoneId\}\}/, "depth verification id should include the milestone id");
@ -104,6 +136,14 @@ test("headless milestone creation preserves depth gate and draft fallback", () =
  assert.doesNotMatch(prompt, /\*\*DO NOT ask the user any questions\*\*/);
 });

+test("headless milestone creation uses one final question gate, not exploratory questions", () => {
+  const prompt = readPrompt("discuss-headless");
+  assert.match(prompt, /The final gate is the only question in headless mode/i);
+  assert.match(prompt, /not an exploratory question round/i);
+  assert.match(prompt, /compact depth summary/i);
+  assert.match(prompt, /write or should remain a draft/i);
+});
+
 test("queue prompt requires waiting for user response between rounds", () => {
  const prompt = readPrompt("queue");
  assert.match(prompt, /Never fabricate or simulate user input during this discussion/i);
--- a/src/resources/extensions/sf/tests/write-gate.test.ts
+++ b/src/resources/extensions/sf/tests/write-gate.test.ts
@ -225,6 +225,7 @@ import {
  setPendingGate,
  clearPendingGate,
  getPendingGate,
+  loadWriteGateSnapshot,
 } from '../bootstrap/write-gate.ts';

 // ─── Scenario 19: isGateQuestionId recognizes all gate patterns ──
@ -333,6 +334,8 @@ test('write-gate: shouldBlockPendingGateBash allows read-only commands during pe
  assert.strictEqual(shouldBlockPendingGateBash('git log --oneline', 'M001').block, false);
  assert.strictEqual(shouldBlockPendingGateBash('grep -r pattern .', 'M001').block, false);
  assert.strictEqual(shouldBlockPendingGateBash('ls -la', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('npm run test', 'M001').block, false);
+  assert.strictEqual(shouldBlockPendingGateBash('npm run typecheck', 'M001').block, false);

  clearDiscussionFlowState();
 });
@ -367,6 +370,27 @@ test('write-gate: resetWriteGateState clears pending gate', () => {
  assert.strictEqual(getPendingGate(), null);
 });

+test('write-gate: persisted snapshot deletion clears hard block when persistence is enabled', () => {
+  const previous = process.env.SF_PERSIST_WRITE_GATE_STATE;
+  process.env.SF_PERSIST_WRITE_GATE_STATE = '1';
+  try {
+    setPendingGate('depth_verification');
+    const snapshot = loadWriteGateSnapshot(`/tmp/sf-write-gate-missing-${process.pid}`);
+    assert.deepStrictEqual(snapshot, {
+      verifiedDepthMilestones: [],
+      activeQueuePhase: false,
+      pendingGateId: null,
+    });
+  } finally {
+    if (previous === undefined) {
+      delete process.env.SF_PERSIST_WRITE_GATE_STATE;
+    } else {
+      process.env.SF_PERSIST_WRITE_GATE_STATE = previous;
+    }
+    clearDiscussionFlowState();
+  }
+});
+
 // ─── Standard options fixture used across depth confirmation tests ──

 const STANDARD_OPTIONS = [
--- a/src/resources/extensions/sf/uok/plan-v2.ts
+++ b/src/resources/extensions/sf/uok/plan-v2.ts
@ -7,13 +7,17 @@ import { isDbAvailable, getMilestoneSlices, getSliceTasks, type SliceRow } from
 import type { UokGraphNode } from "./contracts.js";

 const PLAN_V2_CLARIFY_ROUND_LIMIT = 3;
-const EXECUTION_ENTRY_PHASES: ReadonlySet<Phase> = new Set([
+export const EXECUTION_ENTRY_PHASES: ReadonlySet<Phase> = new Set([
  "executing",
  "summarizing",
  "validating-milestone",
  "completing-milestone",
 ]);

+export function isExecutionEntryPhase(phase: Phase): boolean {
+  return EXECUTION_ENTRY_PHASES.has(phase);
+}
+
 export interface PlanV2CompileResult {
  ok: boolean;
  reason?: string;
@ -48,10 +52,6 @@ function countSliceResearchArtifacts(basePath: string, milestoneId: string, slic
  return count;
 }

-function isExecutionEntryPhase(phase: Phase): boolean {
-  return EXECUTION_ENTRY_PHASES.has(phase);
-}
-
 export function compileUnitGraphFromState(basePath: string, state: SFState): PlanV2CompileResult {
  const mid = state.activeMilestone?.id;
  if (!mid) return { ok: false, reason: "no active milestone" };
@ -146,6 +146,14 @@ export function compileUnitGraphFromState(basePath: string, state: SFState): Pla
  };
 }

+export function hasFinalizedMilestoneContext(basePath: string, milestoneId: string): boolean {
+  return hasFileContent(resolveMilestoneFile(basePath, milestoneId, "CONTEXT"));
+}
+
+export function isMissingFinalizedContextResult(result: PlanV2CompileResult): boolean {
+  return !result.ok && result.finalizedContextIncluded === false;
+}
+
 export function ensurePlanV2Graph(basePath: string, state: SFState): PlanV2CompileResult {
  const compiled = compileUnitGraphFromState(basePath, state);
  if (!compiled.ok) return compiled;
--- a/src/tests/resource-loader-content-hash.test.ts
+++ b/src/tests/resource-loader-content-hash.test.ts
@ -0,0 +1,83 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+/**
+ * Regression test for gsd-build/gsd-2 #4787.
+ *
+ * Background: `computeResourceFingerprint` previously hashed the relative
+ * file path + file size only. Same-byte-length edits to bundled prompt
+ * templates (e.g. the #4570 retry-cap fix to parallel-research-slices.md)
+ * slipped through the fingerprint gate in `initResources`, so existing
+ * installs silently kept serving the stale cached copy from
+ * `~/.gsd/agent/extensions/gsd/prompts/`.
+ *
+ * The fix hashes file CONTENTS (sha256) instead of just size — any edit,
+ * regardless of length, produces a different fingerprint and triggers a
+ * resync on next launch.
+ */
+
+test("computeResourceFingerprint detects same-size content edits (#4787)", async (t) => {
+  const { computeResourceFingerprint } = await import("../resource-loader.ts");
+
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-fingerprint-content-"));
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  const dirA = join(tmp, "bundled-a");
+  const dirB = join(tmp, "bundled-b");
+  mkdirSync(join(dirA, "prompts"), { recursive: true });
+  mkdirSync(join(dirB, "prompts"), { recursive: true });
+
+  // Same byte length (32 bytes each), different content — mirrors the
+  // real-world #4787 scenario where a hotfix edit keeps the file size
+  // stable but changes load-bearing instructions.
+  const contentA = "retry subagent once then BLOCKER"; // 32 bytes
+  const contentB = "retry subagent forever never stp"; // 32 bytes
+  assert.equal(Buffer.byteLength(contentA), Buffer.byteLength(contentB));
+
+  writeFileSync(join(dirA, "prompts", "foo.md"), contentA);
+  writeFileSync(join(dirB, "prompts", "foo.md"), contentB);
+
+  const hashA = computeResourceFingerprint(dirA);
+  const hashB = computeResourceFingerprint(dirB);
+
+  assert.notEqual(
+    hashA,
+    hashB,
+    "same-size, different-content trees must yield different fingerprints",
+  );
+});
+
+test("syncResourceDir overwrites same-size stale content on refresh (#4787)", async (t) => {
+  const { syncResourceDir } = await import("../resource-loader.ts");
+
+  const tmp = mkdtempSync(join(tmpdir(), "gsd-sync-samesize-"));
+  t.after(() => { rmSync(tmp, { recursive: true, force: true }); });
+
+  const bundled = join(tmp, "bundled", "prompts");
+  const installed = join(tmp, "installed", "prompts");
+  mkdirSync(bundled, { recursive: true });
+  mkdirSync(installed, { recursive: true });
+
+  // Bundled (new): the post-#4570 fix template
+  const newContent = "retry subagent once then BLOCKER";
+  // Installed (stale): pre-#4570 template with the same byte length
+  const staleContent = "retry subagent forever never stp";
+  assert.equal(Buffer.byteLength(newContent), Buffer.byteLength(staleContent));
+
+  writeFileSync(join(bundled, "parallel-research-slices.md"), newContent);
+  writeFileSync(join(installed, "parallel-research-slices.md"), staleContent);
+
+  // syncResourceDir always force-copies; this guards that the copy path
+  // itself overwrites regardless of size.
+  syncResourceDir(join(tmp, "bundled"), join(tmp, "installed"));
+
+  const actual = readFileSync(join(installed, "parallel-research-slices.md"), "utf-8");
+  assert.equal(
+    actual,
+    newContent,
+    "installed prompt must be overwritten with bundled content even when sizes match",
+  );
+});