singularity-forge/src/resources/extensions/sf/tests/integration/auto-recovery.test.ts
Mikael Hugo d73a73d7f3 chore: node 24 native APIs, import.meta.dirname, parsers rename, dep updates
- Replace fileURLToPath(import.meta.url) with import.meta.dirname across
  scripts and extensions
- Rename parsers-legacy.ts → parsers.ts
- Remove deleted plan/spec docs (cicd-pipeline)
- Update package.json engines and deps across workspace packages
- Update web/package-lock.json

💘 Generated with Crush

Assisted-by: GLM-5.1 via Crush <crush@charm.land>
2026-05-02 06:18:25 +02:00

1433 lines
42 KiB
TypeScript

import assert from "node:assert/strict";
import { execFileSync } from "node:child_process";
import { randomUUID } from "node:crypto";
import {
chmodSync,
existsSync,
mkdirSync,
readFileSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test, afterEach } from 'vitest';
import {
buildLoopRemediationSteps,
diagnoseExpectedArtifact,
hasImplementationArtifacts,
reconcileMergeState,
resolveExpectedArtifactPath,
verifyExpectedArtifact,
} from "../../auto-recovery.ts";
import { invalidateAllCaches } from "../../cache.ts";
import { clearParseCache, parseTaskPlanFile } from "../../files.ts";
import { renderPlanFromDb } from "../../markdown-renderer.ts";
import { parsePlan, parseRoadmap } from "../../parsers.ts";
import {
closeDatabase,
insertMilestone,
insertSlice,
insertTask,
openDatabase,
} from "../../sf-db.ts";
import { deriveState, invalidateStateCache } from "../../state.ts";
function makeTmpBase(): string {
const base = join(tmpdir(), `sf-test-${randomUUID()}`);
// Create .sf/milestones/M001/slices/S01/tasks/ structure
mkdirSync(join(base, ".sf", "milestones", "M001", "slices", "S01", "tasks"), {
recursive: true,
});
return base;
}
function cleanup(base: string): void {
try {
rmSync(base, { recursive: true, force: true });
} catch {
/* */
}
}
// ─── resolveExpectedArtifactPath ──────────────────────────────────────────
test("resolveExpectedArtifactPath returns correct path for research-milestone", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const result = resolveExpectedArtifactPath(
"research-milestone",
"M001",
base,
);
assert.ok(result);
assert.ok(result!.includes("M001"));
assert.ok(result!.includes("RESEARCH"));
});
test("resolveExpectedArtifactPath returns correct path for execute-task", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const result = resolveExpectedArtifactPath(
"execute-task",
"M001/S01/T01",
base,
);
assert.ok(result);
assert.ok(result!.includes("tasks"));
assert.ok(result!.includes("SUMMARY"));
});
test("resolveExpectedArtifactPath returns correct path for complete-slice", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const result = resolveExpectedArtifactPath(
"complete-slice",
"M001/S01",
base,
);
assert.ok(result);
assert.ok(result!.includes("SUMMARY"));
});
test("resolveExpectedArtifactPath returns correct path for plan-slice", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const result = resolveExpectedArtifactPath("plan-slice", "M001/S01", base);
assert.ok(result);
assert.ok(result!.includes("PLAN"));
});
test("resolveExpectedArtifactPath returns null for unknown type", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const result = resolveExpectedArtifactPath("unknown-type", "M001", base);
assert.equal(result, null);
});
test("resolveExpectedArtifactPath returns correct path for all milestone-level types", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const planResult = resolveExpectedArtifactPath(
"plan-milestone",
"M001",
base,
);
assert.ok(planResult);
assert.ok(planResult!.includes("ROADMAP"));
const completeResult = resolveExpectedArtifactPath(
"complete-milestone",
"M001",
base,
);
assert.ok(completeResult);
assert.ok(completeResult!.includes("SUMMARY"));
});
test("resolveExpectedArtifactPath returns correct path for all slice-level types", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const researchResult = resolveExpectedArtifactPath(
"research-slice",
"M001/S01",
base,
);
assert.ok(researchResult);
assert.ok(researchResult!.includes("RESEARCH"));
const assessResult = resolveExpectedArtifactPath(
"reassess-roadmap",
"M001/S01",
base,
);
assert.ok(assessResult);
assert.ok(assessResult!.includes("ASSESSMENT"));
const uatResult = resolveExpectedArtifactPath("run-uat", "M001/S01", base);
assert.ok(uatResult);
assert.ok(uatResult!.includes("ASSESSMENT"));
});
// ─── run-uat artifact path contract (#2873) ──────────────────────────────
test("resolveExpectedArtifactPath for run-uat returns ASSESSMENT path, not UAT (#2873)", (t) => {
// The run-uat prompt instructs the agent to call sf_summary_save with
// artifact_type: "ASSESSMENT", which writes S##-ASSESSMENT.md. The artifact
// verification path must match — otherwise verification fails and auto-mode
// retries the unit in an infinite loop.
const base = makeTmpBase();
afterEach(() => cleanup(base));
const result = resolveExpectedArtifactPath("run-uat", "M001/S01", base);
assert.ok(result, "run-uat should resolve to a non-null artifact path");
assert.ok(
result!.endsWith("S01-ASSESSMENT.md"),
`run-uat artifact path should end with S01-ASSESSMENT.md, got: ${result}`,
);
});
test("diagnoseExpectedArtifact for run-uat references ASSESSMENT (#2873)", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const diag = diagnoseExpectedArtifact("run-uat", "M001/S01", base);
assert.ok(diag, "run-uat should have a diagnostic message");
assert.ok(
diag!.includes("ASSESSMENT"),
`run-uat diagnostic should reference ASSESSMENT, got: ${diag}`,
);
});
test("verifyExpectedArtifact passes for run-uat when ASSESSMENT file exists (#2873)", (t) => {
// Regression test: run-uat writes S##-ASSESSMENT.md via sf_summary_save,
// but verification looked for S##-UAT.md, causing false stuck retries.
const base = makeTmpBase();
afterEach(() => cleanup(base));
// Write the ASSESSMENT file (what sf_summary_save actually produces)
const assessPath = join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"S01-ASSESSMENT.md",
);
writeFileSync(assessPath, "---\nverdict: PASS\n---\n# UAT Assessment\n");
const verified = verifyExpectedArtifact("run-uat", "M001/S01", base);
assert.ok(
verified,
"verifyExpectedArtifact should pass when ASSESSMENT file exists",
);
});
// ─── diagnoseExpectedArtifact ─────────────────────────────────────────────
test("diagnoseExpectedArtifact returns description for known types", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const research = diagnoseExpectedArtifact("research-milestone", "M001", base);
assert.ok(research);
assert.ok(research!.includes("research"));
const plan = diagnoseExpectedArtifact("plan-slice", "M001/S01", base);
assert.ok(plan);
assert.ok(plan!.includes("plan"));
const task = diagnoseExpectedArtifact("execute-task", "M001/S01/T01", base);
assert.ok(task);
assert.ok(task!.includes("T01"));
});
test("diagnoseExpectedArtifact returns null for unknown type", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
assert.equal(diagnoseExpectedArtifact("unknown", "M001", base), null);
});
// ─── buildLoopRemediationSteps ────────────────────────────────────────────
test("buildLoopRemediationSteps returns steps for execute-task", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const steps = buildLoopRemediationSteps("execute-task", "M001/S01/T01", base);
assert.ok(steps);
assert.ok(steps!.includes("T01"));
assert.ok(steps!.includes("sf undo-task"));
});
test("buildLoopRemediationSteps returns steps for plan-slice", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const steps = buildLoopRemediationSteps("plan-slice", "M001/S01", base);
assert.ok(steps);
assert.ok(steps!.includes("PLAN"));
assert.ok(steps!.includes("sf recover"));
});
test("buildLoopRemediationSteps returns steps for complete-slice", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const steps = buildLoopRemediationSteps("complete-slice", "M001/S01", base);
assert.ok(steps);
assert.ok(steps!.includes("S01"));
assert.ok(steps!.includes("sf reset-slice"));
});
test("buildLoopRemediationSteps returns null for unknown type", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
assert.equal(buildLoopRemediationSteps("unknown", "M001", base), null);
});
// ─── verifyExpectedArtifact: parse cache collision regression ─────────────
test("verifyExpectedArtifact detects roadmap [x] change despite parse cache", (t) => {
// Regression test: cacheKey collision when [ ] → [x] doesn't change
// file length or first/last 100 chars. Without the fix, parseRoadmap
// returns stale cached data with done=false even though the file has [x].
const base = makeTmpBase();
afterEach(() => {
clearParseCache();
cleanup(base);
});
// Build a roadmap long enough that the [x] change is outside the first/last 100 chars
const padding = "A".repeat(200);
const roadmapBefore = [
`# M001: Test Milestone ${padding}`,
"",
"## Slices",
"",
"- [ ] **S01: First slice** `risk:low`",
"",
`## Footer ${padding}`,
].join("\n");
const roadmapAfter = roadmapBefore.replace("- [ ] **S01:", "- [x] **S01:");
// Verify lengths are identical (the key collision condition)
assert.equal(roadmapBefore.length, roadmapAfter.length);
// Populate parse cache with the pre-edit roadmap
const before = parseRoadmap(roadmapBefore);
const sliceBefore = before.slices.find((s) => s.id === "S01");
assert.ok(sliceBefore);
assert.equal(sliceBefore!.done, false);
// Now write the post-edit roadmap to disk and create required artifacts
const roadmapPath = join(
base,
".sf",
"milestones",
"M001",
"M001-ROADMAP.md",
);
writeFileSync(roadmapPath, roadmapAfter);
const summaryPath = join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"S01-SUMMARY.md",
);
writeFileSync(summaryPath, "# Summary\nDone.");
const uatPath = join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"S01-UAT.md",
);
writeFileSync(uatPath, "# UAT\nPassed.");
// verifyExpectedArtifact should see the [x] despite the parse cache
// having the [ ] version. The fix clears the parse cache inside verify.
const verified = verifyExpectedArtifact("complete-slice", "M001/S01", base);
assert.equal(
verified,
true,
"verifyExpectedArtifact should return true when roadmap has [x]",
);
});
// ─── verifyExpectedArtifact: plan-slice empty scaffold regression (#699) ──
test("verifyExpectedArtifact rejects plan-slice with empty scaffold", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const sliceDir = join(base, ".sf", "milestones", "M001", "slices", "S01");
mkdirSync(sliceDir, { recursive: true });
writeFileSync(
join(sliceDir, "S01-PLAN.md"),
"# S01: Test Slice\n\n## Tasks\n\n",
);
assert.strictEqual(
verifyExpectedArtifact("plan-slice", "M001/S01", base),
false,
"Empty scaffold should not be treated as completed artifact",
);
});
test("verifyExpectedArtifact accepts plan-slice with actual tasks", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const sliceDir = join(base, ".sf", "milestones", "M001", "slices", "S01");
const tasksDir = join(sliceDir, "tasks");
mkdirSync(tasksDir, { recursive: true });
writeFileSync(
join(sliceDir, "S01-PLAN.md"),
[
"# S01: Test Slice",
"",
"## Adversarial Review",
"",
"### Partner Review",
"",
"The task list is concrete enough to execute.",
"",
"### Combatant Review",
"",
"A scaffold without review would be too weak, so this fixture includes the required pushback.",
"",
"### Architect Review",
"",
"The plan is valid only because it can hand off execution with real task artifacts.",
"",
"## Tasks",
"",
"- [ ] **T01: Implement feature** `est:2h`",
"- [ ] **T02: Write tests** `est:1h`",
].join("\n"),
);
writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan");
writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan");
assert.strictEqual(
verifyExpectedArtifact("plan-slice", "M001/S01", base),
true,
"Plan with task entries should be treated as completed artifact",
);
});
test("verifyExpectedArtifact accepts plan-slice with completed tasks", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const sliceDir = join(base, ".sf", "milestones", "M001", "slices", "S01");
const tasksDir = join(sliceDir, "tasks");
mkdirSync(tasksDir, { recursive: true });
writeFileSync(
join(sliceDir, "S01-PLAN.md"),
[
"# S01: Test Slice",
"",
"## Adversarial Review",
"",
"### Partner Review",
"",
"Checked tasks should still count as valid plan entries.",
"",
"### Combatant Review",
"",
"Completion markers alone are not enough without the review block.",
"",
"### Architect Review",
"",
"The fixture should still represent a complete handoff artifact.",
"",
"## Tasks",
"",
"- [x] **T01: Implement feature** `est:2h`",
"- [ ] **T02: Write tests** `est:1h`",
].join("\n"),
);
writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan");
writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan");
assert.strictEqual(
verifyExpectedArtifact("plan-slice", "M001/S01", base),
true,
"Plan with completed task entries should be treated as completed artifact",
);
});
// ─── verifyExpectedArtifact: plan-slice task plan check (#739) ────────────
test("verifyExpectedArtifact plan-slice passes when all task plan files exist", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const tasksDir = join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"tasks",
);
const planPath = join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"S01-PLAN.md",
);
const planContent = [
"# S01: Test Slice",
"",
"## Adversarial Review",
"",
"### Partner Review",
"",
"The slice has concrete tasks and matching task plans.",
"",
"### Combatant Review",
"",
"Missing task plan files must still fail even when the slice plan looks complete.",
"",
"### Architect Review",
"",
"This keeps artifact verification strict at both the slice and task layer.",
"",
"## Tasks",
"",
"- [ ] **T01: First task** `est:1h`",
"- [ ] **T02: Second task** `est:2h`",
].join("\n");
writeFileSync(planPath, planContent);
writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan\n\nDo the thing.");
writeFileSync(
join(tasksDir, "T02-PLAN.md"),
"# T02 Plan\n\nDo the other thing.",
);
const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
assert.equal(result, true, "should pass when all task plan files exist");
});
test("verifyExpectedArtifact plan-slice fails when a task plan file is missing (#739)", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const tasksDir = join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"tasks",
);
const planPath = join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"S01-PLAN.md",
);
const planContent = [
"# S01: Test Slice",
"",
"## Adversarial Review",
"",
"### Partner Review",
"",
"The slice plan is otherwise valid, so the missing task plan should be the deciding failure.",
"",
"### Combatant Review",
"",
"A valid slice plan should not mask missing task artifacts.",
"",
"### Architect Review",
"",
"This keeps the regression narrowly focused on per-task plan existence.",
"",
"## Tasks",
"",
"- [ ] **T01: First task** `est:1h`",
"- [ ] **T02: Second task** `est:2h`",
].join("\n");
writeFileSync(planPath, planContent);
// Only write T01-PLAN.md — T02 is missing
writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan\n\nDo the thing.");
const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
assert.equal(result, false, "should fail when T02-PLAN.md is missing");
});
test("verifyExpectedArtifact plan-slice fails for plan with no tasks (#699)", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const planPath = join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"S01-PLAN.md",
);
const planContent = [
"# S01: Test Slice",
"",
"## Goal",
"",
"Just some documentation updates, no tasks.",
].join("\n");
writeFileSync(planPath, planContent);
const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
assert.equal(
result,
false,
"should fail when plan has no task entries (empty scaffold, #699)",
);
});
// ─── verifyExpectedArtifact: heading-style plan tasks (#1691) ─────────────
test("verifyExpectedArtifact accepts plan-slice with heading-style tasks (### T01 --)", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const sliceDir = join(base, ".sf", "milestones", "M001", "slices", "S01");
const tasksDir = join(sliceDir, "tasks");
mkdirSync(tasksDir, { recursive: true });
writeFileSync(
join(sliceDir, "S01-PLAN.md"),
[
"# S01: Test Slice",
"",
"## Adversarial Review",
"",
"### Partner Review",
"",
"Heading-style task plans are valid if the review is still present.",
"",
"### Combatant Review",
"",
"The parser must not confuse heading-style tasks with a shallow scaffold.",
"",
"### Architect Review",
"",
"Allowing both plan formats keeps migration compatibility without weakening the gate.",
"",
"## Tasks",
"",
"### T01 -- Implement feature",
"",
"Feature description.",
"",
"### T02 -- Write tests",
"",
"Test description.",
].join("\n"),
);
writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan");
writeFileSync(join(tasksDir, "T02-PLAN.md"), "# T02 Plan");
assert.strictEqual(
verifyExpectedArtifact("plan-slice", "M001/S01", base),
true,
"Heading-style plan with task entries should be treated as completed artifact",
);
});
test("verifyExpectedArtifact accepts plan-slice with colon-style heading tasks (### T01:)", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const sliceDir = join(base, ".sf", "milestones", "M001", "slices", "S01");
const tasksDir = join(sliceDir, "tasks");
mkdirSync(tasksDir, { recursive: true });
writeFileSync(
join(sliceDir, "S01-PLAN.md"),
[
"# S01: Test Slice",
"",
"## Adversarial Review",
"",
"### Partner Review",
"",
"Colon-style heading tasks are also valid when the plan is otherwise complete.",
"",
"### Combatant Review",
"",
"This guards against accepting bare headings without review.",
"",
"### Architect Review",
"",
"The validation rule stays format-flexible but review-strict.",
"",
"## Tasks",
"",
"### T01: Implement feature",
"",
"Feature description.",
].join("\n"),
);
writeFileSync(join(tasksDir, "T01-PLAN.md"), "# T01 Plan");
assert.strictEqual(
verifyExpectedArtifact("plan-slice", "M001/S01", base),
true,
"Colon heading-style plan should be treated as completed artifact",
);
});
test("verifyExpectedArtifact execute-task rejects heading-style plan without checked checkbox (#3607)", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const sliceDir = join(base, ".sf", "milestones", "M001", "slices", "S01");
const tasksDir = join(sliceDir, "tasks");
mkdirSync(tasksDir, { recursive: true });
writeFileSync(
join(sliceDir, "S01-PLAN.md"),
[
"# S01: Test Slice",
"",
"## Tasks",
"",
"### T01 -- Implement feature",
"",
"Feature description.",
].join("\n"),
);
writeFileSync(join(tasksDir, "T01-SUMMARY.md"), "# T01 Summary\n\nDone.");
// Heading-style entries no longer count as verified — only checked
// checkboxes prove sf_complete_task ran (#3607).
assert.strictEqual(
verifyExpectedArtifact("execute-task", "M001/S01/T01", base),
false,
"heading-style without checked checkbox should NOT pass verification",
);
});
test("verifyExpectedArtifact plan-slice passes for rendered slice/task plan artifacts from DB", async () => {
const base = makeTmpBase();
const dbPath = join(base, ".sf", "sf.db");
openDatabase(dbPath);
try {
insertMilestone({ id: "M001", title: "Milestone", status: "active" });
insertSlice({
id: "S01",
milestoneId: "M001",
title: "Rendered slice",
status: "pending",
demo: "Rendered plan artifacts exist.",
planning: {
goal: "Render plans from DB rows.",
successCriteria: "- Slice plan parses\n- Task plan files exist on disk",
proofLevel: "integration",
integrationClosure:
"DB rows are the source of truth for PLAN artifacts.",
observabilityImpact:
"- Recovery verification fails if a task plan file is missing",
adversarialReview: {
partner:
"The DB-backed renderer already owns the slice contract, so persisting the review in the same row keeps plan intent and artifact generation aligned.",
combatant:
"A shallow plan could still render. The recovery gate must reject missing review data so auto-mode does not treat scaffolding as execution-ready.",
architect:
"This keeps plan quality enforcement on the same boundary as artifact verification instead of relying on prompt discipline alone.",
},
},
});
insertTask({
id: "T01",
sliceId: "S01",
milestoneId: "M001",
title: "Render plan",
status: "pending",
planning: {
description: "Create the slice plan from DB state.",
estimate: "30m",
files: ["src/resources/extensions/sf/markdown-renderer.ts"],
verify: "node --test markdown-renderer.test.ts",
inputs: ["src/resources/extensions/sf/sf-db.ts"],
expectedOutput: [
"src/resources/extensions/sf/tests/markdown-renderer.test.ts",
],
observabilityImpact: "Renderer tests cover the failure mode.",
},
});
insertTask({
id: "T02",
sliceId: "S01",
milestoneId: "M001",
title: "Verify recovery",
status: "pending",
planning: {
description: "Prove task plan files remain present for recovery.",
estimate: "20m",
files: ["src/resources/extensions/sf/auto-recovery.ts"],
verify: "node --test auto-recovery.test.ts",
inputs: ["src/resources/extensions/sf/auto-recovery.ts"],
expectedOutput: [
"src/resources/extensions/sf/tests/auto-recovery.test.ts",
],
observabilityImpact:
"Missing plan files surface as explicit verification failures.",
},
});
const rendered = await renderPlanFromDb(base, "M001", "S01");
assert.ok(
existsSync(rendered.planPath),
"renderPlanFromDb should write the slice plan",
);
assert.equal(
rendered.taskPlanPaths.length,
2,
"renderPlanFromDb should render one task plan per task",
);
const planContent = readFileSync(rendered.planPath, "utf-8");
const parsedPlan = parsePlan(planContent);
assert.equal(
parsedPlan.tasks.length,
2,
"rendered slice plan should parse into task entries",
);
const taskPlanContent = readFileSync(rendered.taskPlanPaths[0], "utf-8");
const taskPlan = parseTaskPlanFile(taskPlanContent);
assert.deepEqual(
taskPlan.frontmatter.skills_used,
[],
"rendered task plans should use conservative empty skills_used",
);
const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
assert.equal(
result,
true,
"plan-slice verification should pass when rendered task plan files exist",
);
} finally {
closeDatabase();
cleanup(base);
}
});
test("verifyExpectedArtifact plan-slice fails after deleting a rendered task plan file", async () => {
const base = makeTmpBase();
const dbPath = join(base, ".sf", "sf.db");
openDatabase(dbPath);
try {
insertMilestone({ id: "M001", title: "Milestone", status: "active" });
insertSlice({
id: "S01",
milestoneId: "M001",
title: "Rendered slice",
status: "pending",
demo: "Rendered plan artifacts exist.",
planning: {
goal: "Render plans from DB rows.",
successCriteria: "- Slice plan parses\n- Task plan files exist on disk",
proofLevel: "integration",
integrationClosure:
"DB rows are the source of truth for PLAN artifacts.",
observabilityImpact:
"- Recovery verification fails if a task plan file is missing",
adversarialReview: {
partner:
"A rendered plan should remain the canonical artifact for downstream recovery checks.",
combatant:
"Deleting a task plan file should still fail verification even when the slice plan itself looks complete.",
architect:
"This keeps recovery sensitive to both plan completeness and task artifact completeness.",
},
},
});
insertTask({
id: "T01",
sliceId: "S01",
milestoneId: "M001",
title: "Render plan",
status: "pending",
planning: {
description: "Create the slice plan from DB state.",
estimate: "30m",
files: ["src/resources/extensions/sf/markdown-renderer.ts"],
verify: "node --test markdown-renderer.test.ts",
inputs: ["src/resources/extensions/sf/sf-db.ts"],
expectedOutput: [
"src/resources/extensions/sf/tests/markdown-renderer.test.ts",
],
observabilityImpact: "Renderer tests cover the failure mode.",
},
});
insertTask({
id: "T02",
sliceId: "S01",
milestoneId: "M001",
title: "Verify recovery",
status: "pending",
planning: {
description: "Prove task plan files remain present for recovery.",
estimate: "20m",
files: ["src/resources/extensions/sf/auto-recovery.ts"],
verify: "node --test auto-recovery.test.ts",
inputs: ["src/resources/extensions/sf/auto-recovery.ts"],
expectedOutput: [
"src/resources/extensions/sf/tests/auto-recovery.test.ts",
],
observabilityImpact:
"Missing plan files surface as explicit verification failures.",
},
});
const rendered = await renderPlanFromDb(base, "M001", "S01");
rmSync(rendered.taskPlanPaths[1]);
const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
assert.equal(
result,
false,
"plan-slice verification should fail when a rendered task plan file is removed",
);
} finally {
closeDatabase();
cleanup(base);
}
});
test("verifyExpectedArtifact plan-slice fails when adversarial review is missing", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
writeFileSync(
join(base, ".sf", "milestones", "M001", "slices", "S01", "S01-PLAN.md"),
[
"# S01: First Slice",
"",
"**Goal:** Test plan quality.",
"**Demo:** Task artifacts exist.",
"",
"## Tasks",
"",
"- [ ] **T01: Do thing**",
" - Files: `src/example.ts`",
" - Verify: `npm test`",
].join("\n"),
);
writeFileSync(
join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"tasks",
"T01-PLAN.md",
),
"# T01 PLAN\n",
);
const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
assert.equal(
result,
false,
"plan-slice verification should fail without adversarial review",
);
});
test("verifyExpectedArtifact plan-slice fails when planning meeting routes back to researching", (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
writeFileSync(
join(base, ".sf", "milestones", "M001", "slices", "S01", "S01-PLAN.md"),
[
"# S01: First Slice",
"",
"**Goal:** Test plan quality.",
"**Demo:** Task artifacts exist.",
"",
"## Adversarial Review",
"",
"### Partner Review",
"",
"The current plan could work if the premise holds.",
"",
"### Combatant Review",
"",
"The premise may still be wrong, so this should not execute yet.",
"",
"### Architect Review",
"",
"The route should stay conservative until the premise is rechecked.",
"",
"## Planning Meeting",
"",
"### Trigger",
"",
"Multiple plausible approaches remained after the first pass.",
"",
"### Product Manager",
"",
"The increment is still unclear enough that shipping now would be premature.",
"",
"### Researcher",
"",
"The current evidence does not yet narrow the best approach enough.",
"",
"### Partner",
"",
"One path looks viable if the assumptions hold.",
"",
"### Combatant",
"",
"Those assumptions are still too weak.",
"",
"### Architect",
"",
"The system boundary is not yet proven.",
"",
"### Moderator",
"",
"Return to research before planning execution.",
"",
"### Recommended Route",
"",
"researching",
"",
"### Confidence",
"",
"Post-meeting confidence is not high enough for execution planning.",
"",
"## Tasks",
"",
"- [ ] **T01: Do thing**",
" - Files: `src/example.ts`",
" - Verify: `npm test`",
].join("\n"),
);
writeFileSync(
join(
base,
".sf",
"milestones",
"M001",
"slices",
"S01",
"tasks",
"T01-PLAN.md",
),
"# T01 PLAN\n",
);
const result = verifyExpectedArtifact("plan-slice", "M001/S01", base);
assert.equal(
result,
false,
"plan-slice verification should fail when the meeting routes back to researching",
);
});
// ─── #793: invalidateAllCaches unblocks skip-loop ─────────────────────────
// When the skip-loop breaker fires, it must call invalidateAllCaches() (not
// just invalidateStateCache()) to clear path/parse caches that deriveState
// depends on. Without this, even after cache invalidation, deriveState reads
// stale directory listings and returns the same unit, looping forever.
test("#793: invalidateAllCaches clears all caches so deriveState sees fresh disk state", async (t) => {
const base = makeTmpBase();
afterEach(() => cleanup(base));
const mid = "M001";
const sid = "S01";
const planDir = join(base, ".sf", "milestones", mid, "slices", sid);
const tasksDir = join(planDir, "tasks");
mkdirSync(tasksDir, { recursive: true });
mkdirSync(join(base, ".sf", "milestones", mid), { recursive: true });
writeFileSync(
join(base, ".sf", "milestones", mid, `${mid}-ROADMAP.md`),
`# M001: Test Milestone\n\n**Vision:** test.\n\n## Slices\n\n- [ ] **${sid}: Slice One** \`risk:low\` \`depends:[]\`\n > After this: done.\n`,
);
const planUnchecked = `# ${sid}: Slice One\n\n**Goal:** test.\n\n## Adversarial Review\n\n### Partner Review\n\nThe plan is concrete enough to execute and exercise cache invalidation.\n\n### Combatant Review\n\nA stale cache must not cause us to accept a plan that was never actually reread.\n\n### Architect Review\n\nThis fixture proves cache invalidation while still meeting the plan-quality contract.\n\n## Tasks\n\n- [ ] **T01: Task One** \`est:10m\`\n- [ ] **T02: Task Two** \`est:10m\`\n`;
writeFileSync(join(planDir, `${sid}-PLAN.md`), planUnchecked);
writeFileSync(
join(tasksDir, "T01-PLAN.md"),
"# T01: Task One\n\n**Goal:** t\n\n## Steps\n- step\n\n## Verification\n- v\n",
);
writeFileSync(
join(tasksDir, "T02-PLAN.md"),
"# T02: Task Two\n\n**Goal:** t\n\n## Steps\n- step\n\n## Verification\n- v\n",
);
// Warm all caches
const state1 = await deriveState(base);
assert.equal(state1.activeTask?.id, "T01", "initial: T01 is active");
// Simulate task completion on disk (what the LLM does)
const planChecked = `# ${sid}: Slice One\n\n**Goal:** test.\n\n## Adversarial Review\n\n### Partner Review\n\nThe plan remains valid after T01 completes.\n\n### Combatant Review\n\nThe cache invalidation path must prove we reread the updated plan rather than trusting stale state.\n\n### Architect Review\n\nThis keeps the regression focused on cache behavior, not on plan completeness.\n\n## Tasks\n\n- [x] **T01: Task One** \`est:10m\`\n- [ ] **T02: Task Two** \`est:10m\`\n`;
writeFileSync(join(planDir, `${sid}-PLAN.md`), planChecked);
writeFileSync(
join(tasksDir, "T01-SUMMARY.md"),
"---\nid: T01\n---\n# Summary\n",
);
// invalidateStateCache alone: _stateCache cleared but path/parse caches warm
invalidateStateCache();
// invalidateAllCaches: all caches cleared — deriveState must re-read disk
invalidateAllCaches();
const state2 = await deriveState(base);
// After full invalidation, T01 should be complete and T02 should be next
assert.notEqual(
state2.activeTask?.id,
"T01",
"#793: T01 not re-dispatched after full invalidation",
);
// Verify the caches are truly cleared by calling clearParseCache and clearPathCache
// do not throw (they should be no-ops after invalidateAllCaches already cleared them)
clearParseCache(); // no-op, but should not throw
assert.ok(true, "clearParseCache after invalidateAllCaches is safe");
});
// ─── hasImplementationArtifacts (#1703) ───────────────────────────────────
function makeGitBase(): string {
const base = join(tmpdir(), `sf-test-git-${randomUUID()}`);
mkdirSync(base, { recursive: true });
execFileSync("git", ["init", "--initial-branch=main"], {
cwd: base,
stdio: "ignore",
});
execFileSync("git", ["config", "user.email", "test@test.com"], {
cwd: base,
stdio: "ignore",
});
execFileSync("git", ["config", "user.name", "Test"], {
cwd: base,
stdio: "ignore",
});
// Create initial commit so HEAD exists
writeFileSync(join(base, ".gitkeep"), "");
execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "initial"], {
cwd: base,
stdio: "ignore",
});
return base;
}
test("hasImplementationArtifacts returns 'absent' when only .sf/ files committed (#1703)", (t) => {
const base = makeGitBase();
afterEach(() => cleanup(base));
// Create a feature branch and commit only .sf/ files
execFileSync("git", ["checkout", "-b", "feat/test-milestone"], {
cwd: base,
stdio: "ignore",
});
mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });
writeFileSync(
join(base, ".sf", "milestones", "M001", "M001-ROADMAP.md"),
"# Roadmap",
);
writeFileSync(
join(base, ".sf", "milestones", "M001", "M001-SUMMARY.md"),
"# Summary",
);
execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "chore: add plan files"], {
cwd: base,
stdio: "ignore",
});
const result = hasImplementationArtifacts(base);
assert.equal(
result,
"absent",
"should return 'absent' when only .sf/ files were committed",
);
});
test("hasImplementationArtifacts returns 'present' when implementation files committed (#1703)", (t) => {
const base = makeGitBase();
afterEach(() => cleanup(base));
// Create a feature branch with both .sf/ and implementation files
execFileSync("git", ["checkout", "-b", "feat/test-impl"], {
cwd: base,
stdio: "ignore",
});
mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });
writeFileSync(
join(base, ".sf", "milestones", "M001", "M001-ROADMAP.md"),
"# Roadmap",
);
mkdirSync(join(base, "src"), { recursive: true });
writeFileSync(
join(base, "src", "feature.ts"),
"export function feature() {}",
);
execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "feat: add feature"], {
cwd: base,
stdio: "ignore",
});
const result = hasImplementationArtifacts(base);
assert.equal(
result,
"present",
"should return 'present' when implementation files are present",
);
});
test("hasImplementationArtifacts returns 'unknown' on non-git directory (fail-open)", (t) => {
const base = join(tmpdir(), `sf-test-nogit-${randomUUID()}`);
mkdirSync(base, { recursive: true });
afterEach(() => cleanup(base));
const result = hasImplementationArtifacts(base);
assert.equal(
result,
"unknown",
"should return 'unknown' (fail-open) in non-git directory",
);
});
// ─── verifyExpectedArtifact: complete-milestone requires impl artifacts (#1703) ──
test("verifyExpectedArtifact complete-milestone fails with only .sf/ files (#1703)", (t) => {
const base = makeGitBase();
afterEach(() => cleanup(base));
// Create feature branch with only .sf/ files
execFileSync("git", ["checkout", "-b", "feat/ms-only-sf"], {
cwd: base,
stdio: "ignore",
});
mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });
writeFileSync(
join(base, ".sf", "milestones", "M001", "M001-SUMMARY.md"),
"# Milestone Summary\nDone.",
);
execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "chore: milestone plan files"], {
cwd: base,
stdio: "ignore",
});
const result = verifyExpectedArtifact("complete-milestone", "M001", base);
assert.equal(
result,
false,
"complete-milestone should fail verification when only .sf/ files present",
);
});
// ─── reconcileMergeState: silent nativeCommit failure (#2542) ─────────────
function makeMockCtx(): {
ctx: any;
notifications: Array<{ msg: string; level: string }>;
} {
const notifications: Array<{ msg: string; level: string }> = [];
const ctx = {
ui: {
notify(msg: string, level: string) {
notifications.push({ msg, level });
},
},
};
return { ctx, notifications };
}
test("reconcileMergeState returns blocked and notifies error when nativeCommit fails (#2542)", (t) => {
const base = makeGitBase();
afterEach(() => cleanup(base));
// Create a second branch with a commit, then start a merge on main
execFileSync("git", ["checkout", "-b", "feature"], {
cwd: base,
stdio: "ignore",
});
writeFileSync(join(base, "feature.txt"), "feature content");
execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "add feature"], {
cwd: base,
stdio: "ignore",
});
execFileSync("git", ["checkout", "main"], { cwd: base, stdio: "ignore" });
// Start merge (no conflicts — fast path with MERGE_HEAD)
execFileSync("git", ["merge", "--no-ff", "--no-commit", "feature"], {
cwd: base,
stdio: "ignore",
});
// Verify MERGE_HEAD exists
assert.ok(
existsSync(join(base, ".git", "MERGE_HEAD")),
"MERGE_HEAD should exist",
);
// Make .git/objects read-only so git cannot write the commit object,
// causing nativeCommit to throw a non-"nothing to commit" error.
const objectsDir = join(base, ".git", "objects");
chmodSync(objectsDir, 0o444);
afterEach(() => {
try {
chmodSync(objectsDir, 0o755);
} catch {
/* cleanup */
}
});
const { ctx, notifications } = makeMockCtx();
const result = reconcileMergeState(base, ctx);
assert.equal(
result,
"blocked",
"reconcileMergeState should return blocked when nativeCommit fails",
);
const errorNotifications = notifications.filter((n) => n.level === "error");
assert.ok(
errorNotifications.length > 0,
"should notify an error when nativeCommit fails",
);
assert.ok(
errorNotifications[0].msg.includes("Failed to finalize"),
"error notification should describe the commit failure",
);
});
test("reconcileMergeState returns clean when no merge state present", (t) => {
const base = makeGitBase();
afterEach(() => cleanup(base));
const { ctx, notifications } = makeMockCtx();
const result = reconcileMergeState(base, ctx);
assert.equal(
result,
"clean",
"should return clean when no merge state exists",
);
assert.equal(
notifications.length,
0,
"should not notify when no merge state present",
);
});
test("reconcileMergeState blocks and preserves unresolved code conflicts", (t) => {
const base = makeGitBase();
afterEach(() => cleanup(base));
writeFileSync(join(base, "conflict.txt"), "base\n");
execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "add conflict base"], {
cwd: base,
stdio: "ignore",
});
execFileSync("git", ["checkout", "-b", "feature"], {
cwd: base,
stdio: "ignore",
});
writeFileSync(join(base, "conflict.txt"), "feature\n");
execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "feature change"], {
cwd: base,
stdio: "ignore",
});
execFileSync("git", ["checkout", "main"], { cwd: base, stdio: "ignore" });
writeFileSync(join(base, "conflict.txt"), "main\n");
execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "main change"], {
cwd: base,
stdio: "ignore",
});
let mergeFailed = false;
try {
execFileSync("git", ["merge", "--no-ff", "feature"], {
cwd: base,
stdio: "ignore",
});
} catch {
mergeFailed = true;
}
assert.equal(mergeFailed, true, "merge should produce a conflict");
assert.ok(
existsSync(join(base, ".git", "MERGE_HEAD")),
"MERGE_HEAD should remain present before reconcile",
);
const beforeContents = readFileSync(join(base, "conflict.txt"), "utf8");
assert.match(
beforeContents,
/<<<<<<<|=======|>>>>>>>/,
"fixture should contain conflict markers",
);
const { ctx, notifications } = makeMockCtx();
const result = reconcileMergeState(base, ctx);
assert.equal(result, "blocked", "code conflicts should block reconciliation");
assert.ok(
existsSync(join(base, ".git", "MERGE_HEAD")),
"MERGE_HEAD should be preserved for manual resolution",
);
assert.equal(
readFileSync(join(base, "conflict.txt"), "utf8"),
beforeContents,
"reconcile should preserve the conflicted file contents",
);
assert.ok(
notifications.some(
(n) =>
n.level === "error" &&
n.msg.includes("manual conflict resolution is preserved"),
),
"should notify that auto-mode paused and preserved manual work",
);
});
test("verifyExpectedArtifact complete-milestone passes with impl files (#1703)", (t) => {
const base = makeGitBase();
afterEach(() => cleanup(base));
// Create feature branch with implementation files AND milestone summary
execFileSync("git", ["checkout", "-b", "feat/ms-with-impl"], {
cwd: base,
stdio: "ignore",
});
mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });
writeFileSync(
join(base, ".sf", "milestones", "M001", "M001-SUMMARY.md"),
"# Milestone Summary\nDone.",
);
mkdirSync(join(base, "src"), { recursive: true });
writeFileSync(join(base, "src", "app.ts"), "console.log('hello');");
execFileSync("git", ["add", "."], { cwd: base, stdio: "ignore" });
execFileSync("git", ["commit", "-m", "feat: implementation"], {
cwd: base,
stdio: "ignore",
});
const result = verifyExpectedArtifact("complete-milestone", "M001", base);
assert.equal(
result,
true,
"complete-milestone should pass verification with implementation files",
);
});