refactor(tools): remove sf_ prefix from all remaining tool names

plan_milestone, plan_slice, plan_task, complete_task, complete_slice,
complete_milestone, skip_slice, replan_slice, reassess_roadmap,
validate_milestone, save_requirement, update_requirement, milestone_status

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-10 07:20:56 +02:00
parent e7bd6a76b9
commit 7085ad850d
83 changed files with 270 additions and 272 deletions

View file

@ -151,9 +151,7 @@ export class CompactionOrchestrator {
if (extensionCompaction) {
summary = extensionCompaction.summary;
firstKeptEntryId = extensionCompaction.firstKeptEntryId;
// Extension may omit tokensBefore (returning undefined) when it delegates
// token-counting to the framework — fall back to the pre-compaction total.
tokensBefore = extensionCompaction.tokensBefore ?? preparation.totalTokens;
tokensBefore = extensionCompaction.tokensBefore ?? preparation.tokensBefore;
details = extensionCompaction.details;
} else {
const result = await compact(
@ -399,7 +397,7 @@ export class CompactionOrchestrator {
if (extensionCompaction) {
summary = extensionCompaction.summary;
firstKeptEntryId = extensionCompaction.firstKeptEntryId;
tokensBefore = extensionCompaction.tokensBefore ?? preparation.totalTokens;
tokensBefore = extensionCompaction.tokensBefore ?? preparation.tokensBefore;
details = extensionCompaction.details;
} else {
const compactResult = await compact(

View file

@ -1,5 +1,5 @@
export const DEFAULT_COMPLETION_NUDGE_AFTER = 10;
export const COMPLETION_NUDGE_TOOL_NAMES = new Set(["sf_slice_complete"]);
export const COMPLETION_NUDGE_TOOL_NAMES = new Set(["complete_slice"]);
const COMPLETION_NUDGE_CUSTOM_TYPE = "sf-completion-nudge";
const LOWERED_TEMPERATURE = 0.2;
const state = {
@ -84,11 +84,11 @@ function nextCompletionNudgeMessage() {
state.reminderSent = true;
state.strongSent = true;
state.lowerTemperatureForNextRequest = true;
return `You've performed ${state.toolCalls} tool calls without calling sf_slice_complete. Stop further investigation unless there is a specific blocker. Call sf_slice_complete now with your summary.`;
return `You've performed ${state.toolCalls} tool calls without calling complete_slice. Stop further investigation unless there is a specific blocker. Call complete_slice now with your summary.`;
}
if (!state.reminderSent && state.toolCalls >= firstThreshold) {
state.reminderSent = true;
return `You've performed ${state.toolCalls} tool calls of investigation. Per the slice plan you should now call sf_slice_complete with your summary. If you genuinely need more context, say so explicitly; otherwise call the tool now.`;
return `You've performed ${state.toolCalls} tool calls of investigation. Per the slice plan you should now call complete_slice with your summary. If you genuinely need more context, say so explicitly; otherwise call the tool now.`;
}
return null;
}

View file

@ -232,7 +232,7 @@ function findRoadmapSliceCountContradiction(roadmapContent, actualSliceCount) {
return null;
}
export function formatTaskCompleteFailurePrompt(reason) {
return `sf_task_complete failed: ${reason}. Try the call again, or investigate the write path.`;
return `complete_task failed: ${reason}. Try the call again, or investigate the write path.`;
}
function prependTaskCompleteFailurePrompt(session, unitId, prompt) {
const reason = session?.pendingTaskCompleteFailures?.get(unitId);

View file

@ -1203,7 +1203,7 @@ export async function postUnitPreVerification(pctx, opts) {
} else if (!triggerArtifactVerified) {
const taskCompleteFailure = taskCompleteFailureForCurrentUnit(s);
if (taskCompleteFailure) {
const retryMessage = `sf_task_complete failed: ${taskCompleteFailure}. Try the call again, or investigate the write path.`;
const retryMessage = `complete_task failed: ${taskCompleteFailure}. Try the call again, or investigate the write path.`;
s.pendingTaskCompleteFailures.set(
s.currentUnit.id,
taskCompleteFailure,

View file

@ -152,7 +152,7 @@ function buildCompleteSliceControlBlock(mid, sid, base) {
"|---|---|---|---|---|---|",
...rows,
"",
"If every task row is `done`, `complete`, or `skipped`, verify the slice-level contract once and call `sf_slice_complete`. Do not reopen planning, do not re-run completed task work, and do not assume a missing roadmap checkbox means the tasks are incomplete.",
"If every task row is `done`, `complete`, or `skipped`, verify the slice-level contract once and call `complete_slice`. Do not reopen planning, do not re-run completed task work, and do not assume a missing roadmap checkbox means the tasks are incomplete.",
"If any task row is still pending or blocked, stop and report the exact task IDs instead of synthesizing new work.",
].join("\n");
} catch (err) {
@ -1809,7 +1809,7 @@ export async function buildExecuteTaskPrompt(
}
})();
// SF ADR-011 P2: when the feature is enabled, teach the executor that it can
// surface non-obvious choices via the `escalation` field on sf_task_complete
// surface non-obvious choices via the `escalation` field on complete_task
// rather than silently picking. Autonomous mode auto-accepts the recommendation
// (see phases.escalation_auto_accept), so this is low-cost overhead — but
// it produces an audit trail and a hard constraint for downstream tasks.
@ -1818,7 +1818,7 @@ export async function buildExecuteTaskPrompt(
const escalationGuidance =
prefs?.preferences?.phases?.mid_execution_escalation === true
? [
"**Surfacing non-obvious choices (optional).** If you hit a decision with material tradeoffs that downstream tasks should respect (e.g. data-loss vs. block-progress, two valid library choices with different long-term cost), include an `escalation` payload in your `sf_task_complete` call:",
"**Surfacing non-obvious choices (optional).** If you hit a decision with material tradeoffs that downstream tasks should respect (e.g. data-loss vs. block-progress, two valid library choices with different long-term cost), include an `escalation` payload in your `complete_task` call:",
"",
"```json",
'"escalation": {',
@ -2031,7 +2031,7 @@ export async function buildCompleteSlicePrompt(
// Gates owned by complete-slice (e.g. Q8). Pull from the DB so the
// prompt only prompts for gates the plan actually seeded. The tool
// handler closes each gate based on the SUMMARY.md section content
// after the assistant calls sf_slice_complete.
// after the assistant calls complete_slice.
const csPending = getPendingGatesForTurn(mid, sid, "complete-slice");
// coverage check: every pending row must be owned by complete-slice.
// requireAll:false because a slice may have already closed some gates.

View file

@ -359,7 +359,7 @@ export function verifyExpectedArtifact(unitType, unitId, base) {
} else if (!isDbAvailable()) {
// LEGACY: Pre-migration fallback for projects without DB.
// Require a CHECKED checkbox — a bare heading or unchecked checkbox
// does not prove sf_task_complete ran. Summary file on disk alone
// does not prove complete_task ran. Summary file on disk alone
// is not sufficient evidence (could be a rogue write) (#3607).
const planAbs = resolveSliceFile(base, mid, sid, "PLAN");
if (planAbs && existsSync(planAbs)) {

View file

@ -699,7 +699,7 @@ export async function bootstrapAutoSession(
bootstrapContext,
"Start the roadmap planning session now: build project knowledge, run the planning meeting, and persist artifacts.",
"Do not stop after reflection. At minimum write CONTEXT-DRAFT with evidence and open questions.",
"If confidence is high enough, write CONTEXT and call sf_plan_milestone so autonomous mode can continue.",
"If confidence is high enough, write CONTEXT and call plan_milestone so autonomous mode can continue.",
].join("\n"),
),
});
@ -742,7 +742,7 @@ export async function bootstrapAutoSession(
bootstrapContext,
"Reuse this milestone ID. Do not create a new milestone for the same bootstrap work.",
"Run the roadmap planning session now and persist CONTEXT or CONTEXT-DRAFT at minimum.",
"If confidence is high enough, write CONTEXT and call sf_plan_milestone so autonomous mode can continue.",
"If confidence is high enough, write CONTEXT and call plan_milestone so autonomous mode can continue.",
].join("\n"),
),
});
@ -1008,7 +1008,7 @@ export async function bootstrapAutoSession(
}
// Gate: abort bootstrap if the DB file exists but the provider is
// still unavailable after both open attempts above. Without this,
// autonomous mode starts but every sf_task_complete / sf_slice_complete
// autonomous mode starts but every complete_task / complete_slice
// call returns "db_unavailable", triggering artifact-retry which
// re-dispatches the same task — producing an infinite loop (#2419).
if (existsSync(sfDbPath) && !isDbAvailable()) {

View file

@ -122,7 +122,7 @@ const DETERMINISTIC_POLICY_ERROR_RE =
* regex explicit substrings keep the list auditable.
*/
export const DETERMINISTIC_POLICY_ERROR_STRINGS = [
// sf_summary_save write-gate: CONTEXT artifact blocked pending depth verification (#4973).
// save_summary write-gate: CONTEXT artifact blocked pending depth verification (#4973).
"context write blocked",
"CONTEXT without depth verification",
// Raw write tool gate (#4973): shouldBlockContextWrite emits this for direct

View file

@ -51,7 +51,7 @@ function isInfraVerificationFailure(stderr) {
* Post-unit guard for `validate-milestone` units (#4094).
*
* When validate-milestone writes verdict=needs-remediation, the agent is
* expected to also call sf_reassess_roadmap in the same turn to add
* expected to also call reassess_roadmap in the same turn to add
* remediation slices. If they don't, the state machine re-derives
* `phase: validating-milestone` indefinitely (all slices still complete +
* verdict still needs-remediation), wasting ~3 dispatches before the stuck
@ -131,7 +131,7 @@ async function runValidateMilestonePostCheck(vctx, pauseAuto) {
);
process.stderr.write(
`validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` +
`The agent must call sf_reassess_roadmap to add remediation slices before re-validation.\n`,
`The agent must call reassess_roadmap to add remediation slices before re-validation.\n`,
);
await persistMilestoneValidationGate(
"manual-attention",

View file

@ -495,7 +495,7 @@ export function getCurrentUnitModelFailures() {
*
* Purpose: prevent a research unit that already produced its durable artifact
* from drifting into planner tools before the orchestrator dispatches planning.
* Consumer: register-hooks tool_result handling for sf_summary_save.
* Consumer: register-hooks tool_result handling for save_summary.
*/
export function markResearchTerminalTransition() {
getAutoSession().researchTerminalTransition = true;
@ -517,17 +517,17 @@ export function markToolStart(toolCallId, toolName) {
export function markToolEnd(toolCallId) {
_markToolEnd(toolCallId);
}
const TASK_COMPLETE_TOOL_NAMES = new Set(["sf_task_complete"]);
const TASK_COMPLETE_TOOL_NAMES = new Set(["complete_task"]);
function normalizeTaskCompleteFailure(errorMsg) {
return errorMsg
.replace(/^Error completing task:\s*/i, "")
.replace(/^sf_task_complete failed:\s*/i, "")
.replace(/^complete_task failed:\s*/i, "")
.trim();
}
/**
* Record a tool invocation error on the current session (#2883).
* Called from tool_execution_end when a SF tool fails with isError.
* Malformed/truncated JSON errors still pause autonomous mode. sf_task_complete
* Malformed/truncated JSON errors still pause autonomous mode. complete_task
* execution errors are tracked separately so the same task can retry in-flow.
*/
export function recordToolInvocationError(toolName, errorMsg) {
@ -804,7 +804,7 @@ export async function stopAuto(ctx, pi, reason) {
: { notify: () => {} };
const resolver = buildResolver();
// Check if the milestone is complete. DB status is the authoritative
// signal — only a successful sf_complete_milestone call flips it to
// signal — only a successful complete_milestone call flips it to
// "complete" (tools/complete-milestone.ts). SUMMARY file presence is
// NOT sufficient: a blocker placeholder stub or a partial write can
// leave a file behind without the milestone actually being done,

View file

@ -12,7 +12,7 @@ import { summarizeLogs } from "../workflow-logger.js";
*/
const ENOENT_PATH_RE = /ENOENT[^']*'([^']+)'/;
const TRANSIENT_TASK_COMPLETE_RE =
/\b(?:sf_task_complete failed|Error completing task:).*SUMMARY\.md write failed/i;
/\b(?:complete_task failed|Error completing task:).*SUMMARY\.md write failed/i;
const MAX_STUCK_REASON_CHARS = 260;
function isTransientTaskCompleteError(entry) {
return (

View file

@ -239,7 +239,7 @@ export class AutoSession {
/** Last turn-level git action status captured during finalize. */
lastGitActionStatus = null;
/**
* Last sf_task_complete execution error for the current turn.
* Last complete_task execution error for the current turn.
* Unlike malformed tool invocation errors, these are normal tool execution
* failures (for example a transient SUMMARY.md write failure) and should be
* retried in-flow instead of pausing autonomous mode.
@ -306,7 +306,7 @@ export class AutoSession {
// ── Research unit terminal transition ──────────────────────────────────
/**
* Set to true when a research unit (research-slice/research-milestone)
* successfully saves its RESEARCH artifact via sf_summary_save.
* successfully saves its RESEARCH artifact via save_summary.
* Subsequent planning tool calls are blocked to prevent post-artifact drift
* where the agent continues into milestone/slice/task planning.
*/

View file

@ -376,9 +376,9 @@ export function buildAutonomousSolverPromptBlock(state) {
"- Invariants: rules that must remain true across iterations.",
"- Assumptions: uncertain facts you relied on and how to falsify them later.",
"",
"If you are executing an `execute-task` unit and the task is finished, `sf_task_complete` remains mandatory; `checkpoint` does not replace it.",
"If you are executing an `execute-task` unit and the task is finished, `complete_task` remains mandatory; `checkpoint` does not replace it.",
"If you need another iteration, leave exact remaining items in the checkpoint rather than ending with vague prose.",
"Your final autonomous action should be the checkpoint tool call unless a required completion tool such as sf_task_complete must be called immediately before it.",
"Your final autonomous action should be the checkpoint tool call unless a required completion tool such as complete_task must be called immediately before it.",
);
return lines.join("\n");
}

View file

@ -29,7 +29,7 @@ import {
import { logError } from "../workflow-logger.js";
import { ensureDbOpen } from "./dynamic-tools.js";
export function registerDbTools(pi) {
// ─── sf_decision_save ─────────────────────────────────────────────────
// ─── save_decision ─────────────────────────────────────────────────
const decisionSaveExecute = async (
_toolCallId,
params,
@ -69,8 +69,8 @@ export function registerDbTools(pi) {
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `sf_decision_save tool failed: ${msg}`, {
tool: "sf_decision_save",
logError("tool", `save_decision tool failed: ${msg}`, {
tool: "save_decision",
error: String(err),
});
return {
@ -80,7 +80,7 @@ export function registerDbTools(pi) {
}
};
const decisionSaveTool = {
name: "sf_decision_save",
name: "save_decision",
label: "Save Decision",
description:
"Record a project decision to the SF database and regenerate DECISIONS.md. " +
@ -88,7 +88,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Record a project decision to the SF database (auto-assigns ID, regenerates DECISIONS.md)",
promptGuidelines: [
"Use sf_decision_save when recording an architectural, pattern, library, or observability decision.",
"Use save_decision when recording an architectural, pattern, library, or observability decision.",
"Decision IDs are auto-assigned (D001, D002, ...) — never guess or provide an ID.",
"All fields except revisable, when_context, and made_by are required.",
"The tool writes to the DB and regenerates .sf/DECISIONS.md automatically.",
@ -128,7 +128,7 @@ export function registerDbTools(pi) {
}),
execute: decisionSaveExecute,
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("sf_decision_save "));
let text = theme.fg("toolTitle", theme.bold("save_decision "));
if (args.scope) text += theme.fg("accent", `[${args.scope}] `);
if (args.decision) text += theme.fg("muted", args.decision);
if (args.choice) text += theme.fg("dim", `${args.choice}`);
@ -149,7 +149,7 @@ export function registerDbTools(pi) {
},
};
pi.registerTool(decisionSaveTool);
// ─── sf_requirement_update ────────────────────────────────────────────
// ─── update_requirement ────────────────────────────────────────────
const requirementUpdateExecute = async (
_toolCallId,
params,
@ -193,8 +193,8 @@ export function registerDbTools(pi) {
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `sf_requirement_update tool failed: ${msg}`, {
tool: "sf_requirement_update",
logError("tool", `update_requirement tool failed: ${msg}`, {
tool: "update_requirement",
error: String(err),
});
return {
@ -208,7 +208,7 @@ export function registerDbTools(pi) {
}
};
const requirementUpdateTool = {
name: "sf_requirement_update",
name: "update_requirement",
label: "Update Requirement",
description:
"Update an existing requirement in the SF database and regenerate REQUIREMENTS.md. " +
@ -216,7 +216,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Update an existing SF requirement by ID (regenerates REQUIREMENTS.md)",
promptGuidelines: [
"Use sf_requirement_update to change status, validation, notes, or other fields on an existing requirement.",
"Use update_requirement to change status, validation, notes, or other fields on an existing requirement.",
"The id parameter is required — it must be an existing RXXX identifier.",
"All other fields are optional — only provided fields are updated.",
"The tool verifies the requirement exists before updating.",
@ -244,7 +244,7 @@ export function registerDbTools(pi) {
}),
execute: requirementUpdateExecute,
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("sf_requirement_update "));
let text = theme.fg("toolTitle", theme.bold("update_requirement "));
if (args.id) text += theme.fg("accent", args.id);
const fields = ["status", "validation", "notes", "description"].filter(
(f) => args[f],
@ -267,7 +267,7 @@ export function registerDbTools(pi) {
},
};
pi.registerTool(requirementUpdateTool);
// ─── sf_requirement_save ─────────────────────────────────────────────
// ─── save_requirement ─────────────────────────────────────────────
const requirementSaveExecute = async (
_toolCallId,
params,
@ -312,8 +312,8 @@ export function registerDbTools(pi) {
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `sf_requirement_save tool failed: ${msg}`, {
tool: "sf_requirement_save",
logError("tool", `save_requirement tool failed: ${msg}`, {
tool: "save_requirement",
error: String(err),
});
return {
@ -323,7 +323,7 @@ export function registerDbTools(pi) {
}
};
const requirementSaveTool = {
name: "sf_requirement_save",
name: "save_requirement",
label: "Save Requirement",
description:
"Record a new requirement to the SF database and regenerate REQUIREMENTS.md. " +
@ -331,7 +331,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Record a new SF requirement to the database (auto-assigns ID, regenerates REQUIREMENTS.md)",
promptGuidelines: [
"Use sf_requirement_save when recording a new functional, non-functional, or operational requirement.",
"Use save_requirement when recording a new functional, non-functional, or operational requirement.",
"Requirement IDs are auto-assigned (R001, R002, ...) — never guess or provide an ID.",
"class, description, why, and source are required. All other fields are optional.",
"The tool writes to the DB and regenerates .sf/REQUIREMENTS.md automatically.",
@ -365,7 +365,7 @@ export function registerDbTools(pi) {
}),
execute: requirementSaveExecute,
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("sf_requirement_save "));
let text = theme.fg("toolTitle", theme.bold("save_requirement "));
if (args.class) text += theme.fg("accent", `[${args.class}] `);
if (args.description) text += theme.fg("muted", args.description);
return new Text(text, 0, 0);
@ -385,7 +385,7 @@ export function registerDbTools(pi) {
},
};
pi.registerTool(requirementSaveTool);
// ─── sf_summary_save ──────────────────────────────────────────────────
// ─── save_summary ──────────────────────────────────────────────────
const summarySaveExecute = async (
_toolCallId,
params,
@ -396,7 +396,7 @@ export function registerDbTools(pi) {
return executeSummarySave(params, process.cwd());
};
const summarySaveTool = {
name: "sf_summary_save",
name: "save_summary",
label: "Save Summary",
description:
"Save a summary, research, context, or assessment artifact to the SF database and write it to disk. " +
@ -404,7 +404,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Save a SF artifact (summary/research/context/assessment) to DB and disk",
promptGuidelines: [
"Use sf_summary_save to persist structured artifacts (SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT).",
"Use save_summary to persist structured artifacts (SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT).",
"milestone_id is required. slice_id and task_id are optional — they determine the file path.",
"The tool computes the relative path automatically: milestones/M001/M001-SUMMARY.md, milestones/M001/slices/S01/S01-SUMMARY.md, etc.",
"artifact_type must be one of: SUMMARY, RESEARCH, CONTEXT, ASSESSMENT, CONTEXT-DRAFT.",
@ -428,7 +428,7 @@ export function registerDbTools(pi) {
}),
execute: summarySaveExecute,
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("sf_summary_save "));
let text = theme.fg("toolTitle", theme.bold("save_summary "));
if (args.artifact_type) text += theme.fg("accent", args.artifact_type);
const path = [args.milestone_id, args.slice_id, args.task_id]
.filter(Boolean)
@ -507,7 +507,7 @@ export function registerDbTools(pi) {
};
/**
* Insert a minimal DB row for a milestone ID so it's visible to the state
* machine. Uses INSERT OR IGNORE safe to call even if sf_plan_milestone
* machine. Uses INSERT OR IGNORE safe to call even if plan_milestone
* later writes the full row. Silently skips if the DB isn't available yet
* (pre-migration).
*/
@ -1030,7 +1030,7 @@ export function registerDbTools(pi) {
);
},
});
// ─── sf_plan_milestone ────────────────────────────────────────────────
// ─── plan_milestone ────────────────────────────────────────────────
const planMilestoneExecute = async (
_toolCallId,
params,
@ -1041,14 +1041,14 @@ export function registerDbTools(pi) {
return executePlanMilestone(params, process.cwd());
};
const planMilestoneTool = {
name: "sf_plan_milestone",
name: "plan_milestone",
label: "Plan Milestone",
description:
"Write milestone planning state to the SF database, render ROADMAP.md from DB, and clear caches after a successful render.",
promptSnippet:
"Plan a milestone via DB write + roadmap render + cache invalidation",
promptGuidelines: [
"Use sf_plan_milestone for milestone planning instead of writing ROADMAP.md directly.",
"Use plan_milestone for milestone planning instead of writing ROADMAP.md directly.",
"Keep parameters flat and provide the full milestone planning payload. Use either explicit slices or templateId-based scaffolding for common feat/fix/refactor patterns.",
"Use productResearch for product/category/competitor research; do not hide those findings inside visionMeeting.researcher.",
"The tool validates input, writes milestone and slice planning data transactionally, renders ROADMAP.md from DB, and clears both state and parse caches after success.",
@ -1329,7 +1329,7 @@ export function registerDbTools(pi) {
const milestoneId = args?.milestoneId ? String(args.milestoneId) : "";
const title = args?.title ? String(args.title) : "";
const slices = Array.isArray(args?.slices) ? args.slices : [];
let text = theme.fg("toolTitle", theme.bold("sf_plan_milestone"));
let text = theme.fg("toolTitle", theme.bold("plan_milestone"));
if (milestoneId || title) {
text += theme.fg(
"muted",
@ -1382,7 +1382,7 @@ export function registerDbTools(pi) {
},
};
pi.registerTool(planMilestoneTool);
// ─── sf_plan_slice ────────────────────────────────────────────────────
// ─── plan_slice ────────────────────────────────────────────────────
const planSliceExecute = async (
_toolCallId,
params,
@ -1393,14 +1393,14 @@ export function registerDbTools(pi) {
return executePlanSlice(params, process.cwd());
};
const planSliceTool = {
name: "sf_plan_slice",
name: "plan_slice",
label: "Plan Slice",
description:
"Write slice planning state to the SF database, render S##-PLAN.md plus task PLAN artifacts from DB, and clear caches after a successful render.",
promptSnippet:
"Plan a slice via DB write + PLAN render + cache invalidation",
promptGuidelines: [
"Use sf_plan_slice for slice planning instead of writing S##-PLAN.md or task PLAN files directly.",
"Use plan_slice for slice planning instead of writing S##-PLAN.md or task PLAN files directly.",
"Keep parameters flat and provide the full slice planning payload, including tasks.",
"The tool validates input, requires an existing parent slice, writes slice/task planning data, renders PLAN.md and task plan files from DB, and clears both state and parse caches after success.",
],
@ -1590,7 +1590,7 @@ export function registerDbTools(pi) {
execute: planSliceExecute,
};
pi.registerTool(planSliceTool);
// ─── sf_plan_task ─────────────────────────────────────────────────────
// ─── plan_task ─────────────────────────────────────────────────────
const planTaskExecute = async (
_toolCallId,
params,
@ -1642,7 +1642,7 @@ export function registerDbTools(pi) {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `plan_task tool failed: ${msg}`, {
tool: "sf_plan_task",
tool: "plan_task",
error: String(err),
});
return {
@ -1652,14 +1652,14 @@ export function registerDbTools(pi) {
}
};
const planTaskTool = {
name: "sf_plan_task",
name: "plan_task",
label: "Plan Task",
description:
"Write task planning state to the SF database, render tasks/T##-PLAN.md from DB, and clear caches after a successful render.",
promptSnippet:
"Plan a task via DB write + task PLAN render + cache invalidation",
promptGuidelines: [
"Use sf_plan_task for task planning instead of writing tasks/T##-PLAN.md directly.",
"Use plan_task for task planning instead of writing tasks/T##-PLAN.md directly.",
"Keep parameters flat and provide the full task planning payload.",
"The tool validates input, requires an existing parent slice, writes task planning data, renders the task PLAN file from DB, and clears both state and parse caches after success.",
],
@ -1688,7 +1688,7 @@ export function registerDbTools(pi) {
execute: planTaskExecute,
};
pi.registerTool(planTaskTool);
// ─── sf_task_complete ─────────────────────────────────────────────────
// ─── complete_task ─────────────────────────────────────────────────
const taskCompleteExecute = async (
_toolCallId,
params,
@ -1699,7 +1699,7 @@ export function registerDbTools(pi) {
return executeTaskComplete(params, process.cwd());
};
const taskCompleteTool = {
name: "sf_task_complete",
name: "complete_task",
label: "Complete Task",
description:
"Record a completed task to the SF database, render a SUMMARY.md to disk, and toggle the plan checkbox — all in one atomic operation. " +
@ -1707,7 +1707,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Complete a SF task (DB write + summary render + checkbox toggle)",
promptGuidelines: [
"Use sf_task_complete when a task is finished and needs to be recorded.",
"Use complete_task when a task is finished and needs to be recorded.",
"All string fields are required. verificationEvidence is an array of objects with command, exitCode, verdict, durationMs.",
"The tool validates required fields and returns an error message if any are missing.",
"On success, returns the summaryPath where the SUMMARY.md was written.",
@ -1825,7 +1825,7 @@ export function registerDbTools(pi) {
execute: taskCompleteExecute,
};
pi.registerTool(taskCompleteTool);
// ─── sf_slice_complete ────────────────────────────────────────────────
// ─── complete_slice ────────────────────────────────────────────────
const sliceCompleteExecute = async (
_toolCallId,
params,
@ -1836,7 +1836,7 @@ export function registerDbTools(pi) {
return executeSliceComplete(params, process.cwd());
};
const sliceCompleteTool = {
name: "sf_slice_complete",
name: "complete_slice",
label: "Complete Slice",
description:
"Record a completed slice to the SF database, render SUMMARY.md + UAT.md to disk, and toggle the roadmap checkbox — all in one atomic operation. " +
@ -1844,7 +1844,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Complete a SF slice (DB write + summary/UAT render + roadmap checkbox toggle)",
promptGuidelines: [
"Use sf_slice_complete when all tasks in a slice are finished and the slice needs to be recorded.",
"Use complete_slice when all tasks in a slice are finished and the slice needs to be recorded.",
"All tasks in the slice must have status 'complete' — the handler validates this before proceeding.",
"On success, returns summaryPath and uatPath where the files were written.",
"Idempotent — calling with the same params twice will not crash.",
@ -1986,7 +1986,7 @@ export function registerDbTools(pi) {
execute: sliceCompleteExecute,
};
pi.registerTool(sliceCompleteTool);
// ─── sf_skip_slice (#3477 / #3487) ───────────────────────────────────
// ─── skip_slice (#3477 / #3487) ───────────────────────────────────
const skipSliceExecute = async (
_toolCallId,
params,
@ -2060,7 +2060,7 @@ export function registerDbTools(pi) {
await rebuildState(basePath);
} catch (err) {
logError("tool", `skip_slice rebuildState failed: ${err.message}`, {
tool: "sf_skip_slice",
tool: "skip_slice",
});
}
return {
@ -2080,7 +2080,7 @@ export function registerDbTools(pi) {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `skip_slice tool failed: ${msg}`, {
tool: "sf_skip_slice",
tool: "skip_slice",
error: String(err),
});
return {
@ -2090,7 +2090,7 @@ export function registerDbTools(pi) {
}
};
pi.registerTool({
name: "sf_skip_slice",
name: "skip_slice",
label: "Skip Slice",
description:
"Mark a slice as skipped so autonomous mode advances past it without executing. " +
@ -2098,7 +2098,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Skip a SF slice (mark as skipped, autonomous mode will advance past it)",
promptGuidelines: [
"Use sf_skip_slice when a slice should be bypassed — descoped, superseded, or no longer relevant.",
"Use skip_slice when a slice should be bypassed — descoped, superseded, or no longer relevant.",
"Cannot skip a slice that is already complete.",
"Skipped slices satisfy downstream dependencies just like completed slices.",
],
@ -2111,7 +2111,7 @@ export function registerDbTools(pi) {
}),
execute: skipSliceExecute,
});
// ─── sf_complete_milestone ────────────────────────────────────────────
// ─── complete_milestone ────────────────────────────────────────────
const milestoneCompleteExecute = async (
_toolCallId,
params,
@ -2122,14 +2122,14 @@ export function registerDbTools(pi) {
return executeCompleteMilestone(params, process.cwd());
};
const milestoneCompleteTool = {
name: "sf_complete_milestone",
name: "complete_milestone",
label: "Complete Milestone",
description:
"Record a completed milestone to the SF database, render MILESTONE-SUMMARY.md to disk — all in one atomic operation. " +
"Validates all slices are complete before proceeding.",
promptSnippet: "Complete a SF milestone (DB write + summary render)",
promptGuidelines: [
"Use sf_complete_milestone when all slices in a milestone are finished and the milestone needs to be recorded.",
"Use complete_milestone when all slices in a milestone are finished and the milestone needs to be recorded.",
"All slices in the milestone must have status 'complete' — the handler validates this before proceeding.",
"verificationPassed must be explicitly set to true — the handler rejects completion if verification did not pass.",
"On success, returns summaryPath where the MILESTONE-SUMMARY.md was written.",
@ -2230,7 +2230,7 @@ export function registerDbTools(pi) {
execute: milestoneCompleteExecute,
};
pi.registerTool(milestoneCompleteTool);
// ─── sf_validate_milestone ────────────────────────────────────────────
// ─── validate_milestone ────────────────────────────────────────────
const milestoneValidateExecute = async (
_toolCallId,
params,
@ -2241,16 +2241,16 @@ export function registerDbTools(pi) {
return executeValidateMilestone(params, process.cwd());
};
const milestoneValidateTool = {
name: "sf_validate_milestone",
name: "validate_milestone",
label: "Validate Milestone",
description:
"Validate a milestone before completion — persist validation results to the DB, render VALIDATION.md to disk. " +
"Records verdict (pass/needs-attention/needs-remediation) and rationale.",
promptSnippet: "Validate a SF milestone (DB write + VALIDATION.md render)",
promptGuidelines: [
"Use sf_validate_milestone when all slices are done and the milestone needs validation before completion.",
"Use validate_milestone when all slices are done and the milestone needs validation before completion.",
"Parameters: milestoneId, verdict, remediationRound, successCriteriaChecklist, sliceDeliveryAudit, crossSliceIntegration, requirementCoverage, verificationClasses (optional), verdictRationale, remediationPlan (optional).",
"If verdict is 'needs-remediation', also provide remediationPlan and use sf_reassess_roadmap to add remediation slices to the roadmap.",
"If verdict is 'needs-remediation', also provide remediationPlan and use reassess_roadmap to add remediation slices to the roadmap.",
"On success, returns validationPath where VALIDATION.md was written.",
],
parameters: Type.Object({
@ -2294,7 +2294,7 @@ export function registerDbTools(pi) {
execute: milestoneValidateExecute,
};
pi.registerTool(milestoneValidateTool);
// ─── sf_replan_slice ──────────────────────────────────────────────────
// ─── replan_slice ──────────────────────────────────────────────────
const replanSliceExecute = async (
_toolCallId,
params,
@ -2305,7 +2305,7 @@ export function registerDbTools(pi) {
return executeReplanSlice(params, process.cwd());
};
const replanSliceTool = {
name: "sf_replan_slice",
name: "replan_slice",
label: "Replan Slice",
description:
"Replan a slice after a blocker is discovered. Structurally enforces preservation of completed tasks — " +
@ -2314,7 +2314,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Replan a SF slice with structural enforcement of completed tasks",
promptGuidelines: [
"Use sf_replan_slice when a blocker is discovered and the slice plan needs rewriting.",
"Use replan_slice when a blocker is discovered and the slice plan needs rewriting.",
"The tool structurally enforces that completed tasks cannot be updated or removed — violations return specific error payloads naming the blocked task ID.",
"Parameters: milestoneId, sliceId, blockerTaskId, blockerDescription, whatChanged, optional slice-level planning/ceremony updates, updatedTasks (array), removedTaskIds (array).",
"updatedTasks items: taskId, title, description, estimate, files, verify, inputs, expectedOutput.",
@ -2440,7 +2440,7 @@ export function registerDbTools(pi) {
execute: replanSliceExecute,
};
pi.registerTool(replanSliceTool);
// ─── sf_reassess_roadmap ──────────────────────────────────────────────
// ─── reassess_roadmap ──────────────────────────────────────────────
const reassessRoadmapExecute = async (
_toolCallId,
params,
@ -2451,7 +2451,7 @@ export function registerDbTools(pi) {
return executeReassessRoadmap(params, process.cwd());
};
const reassessRoadmapTool = {
name: "sf_reassess_roadmap",
name: "reassess_roadmap",
label: "Reassess Roadmap",
description:
"Reassess the milestone roadmap after a slice completes. Structurally enforces preservation of completed slices — " +
@ -2460,7 +2460,7 @@ export function registerDbTools(pi) {
promptSnippet:
"Reassess a SF roadmap with structural enforcement of completed slices",
promptGuidelines: [
"Use sf_reassess_roadmap after a slice completes to reassess the roadmap.",
"Use reassess_roadmap after a slice completes to reassess the roadmap.",
"The tool structurally enforces that completed slices cannot be modified or removed — violations return specific error payloads naming the blocked slice ID.",
"Parameters: milestoneId, completedSliceId, verdict, assessment, sliceChanges (object with modified, added, removed arrays).",
"sliceChanges.modified items: sliceId, title, risk (optional), depends (optional), demo (optional).",

View file

@ -3,7 +3,7 @@ import { queryJournal } from "../journal.js";
import { logWarning } from "../workflow-logger.js";
export function registerJournalTools(pi) {
pi.registerTool({
name: "sf_journal_query",
name: "query_journal",
label: "Query Journal",
description:
"Query the structured event journal for autonomous mode iterations. " +
@ -78,7 +78,7 @@ export function registerJournalTools(pi) {
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logWarning("tool", `sf_journal_query tool failed: ${msg}`);
logWarning("tool", `query_journal tool failed: ${msg}`);
return {
content: [{ type: "text", text: `Error querying journal: ${msg}` }],
details: { operation: "journal_query", error: msg },

View file

@ -1,13 +1,13 @@
// SF — Product Audit tool registration
//
// Exposes `sf_product_audit` to the LLM. The tool persists a structured
// Exposes `audit_product` to the LLM. The tool persists a structured
// product-completeness audit (verdict + gaps) to
// `.sf/active/{milestoneId}/PRODUCT-AUDIT.{json,md}`.
import { Type } from "@sinclair/typebox";
import { handleProductAudit } from "../tools/product-audit-tool.js";
export function registerProductAuditTool(pi) {
pi.registerTool({
name: "sf_product_audit",
name: "audit_product",
label: "Product Audit",
description:
"Persist a milestone-end product-completeness audit. Compares declared " +
@ -71,7 +71,7 @@ export function registerProductAuditTool(pi) {
text: `Error: ${result.error}`,
},
],
details: { operation: "sf_product_audit", error: result.error },
details: { operation: "audit_product", error: result.error },
isError: true,
};
}
@ -85,7 +85,7 @@ export function registerProductAuditTool(pi) {
`Wrote ${result.markdownPath} and ${result.jsonPath}.`,
},
],
details: { operation: "sf_product_audit", ...result },
details: { operation: "audit_product", ...result },
};
},
});

View file

@ -5,7 +5,7 @@ import { executeMilestoneStatus } from "../tools/workflow-tool-executors.js";
import { ensureDbOpen } from "./dynamic-tools.js";
export function registerQueryTools(pi) {
pi.registerTool({
name: "sf_milestone_status",
name: "milestone_status",
label: "Milestone Status",
description:
"Read the current status of a milestone and all its slices from the SF database. " +

View file

@ -760,9 +760,9 @@ export function registerHooks(pi, ecosystemHandlers = []) {
}
// ── Research unit terminal transition enforcement ─────────────────────
// After a research unit (research-slice/research-milestone) successfully
// saves its RESEARCH artifact via sf_summary_save, the tool returns
// saves its RESEARCH artifact via save_summary, the tool returns
// terminal_transition: true. We track this and block subsequent planning
// tool calls to prevent post-artifact drift (e.g. calling sf_plan_milestone
// tool calls to prevent post-artifact drift (e.g. calling plan_milestone
// after research is complete). This addresses sf-moocx6m5-ij630a.
if (isAutoActive()) {
const dash = getAutoDashboardData();
@ -774,12 +774,12 @@ export function registerHooks(pi, ecosystemHandlers = []) {
) {
if (hasResearchTerminalTransition()) {
const planningTools = new Set([
"sf_plan_milestone",
"sf_plan_slice",
"sf_plan_task",
"plan_milestone",
"plan_slice",
"plan_task",
"new_milestone_id",
"sf_replan_slice",
"sf_reassess_roadmap",
"replan_slice",
"reassess_roadmap",
]);
if (planningTools.has(event.toolName)) {
return {
@ -1054,7 +1054,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
});
if (isAutoActive()) {
if (
event.toolName === "sf_summary_save" &&
event.toolName === "save_summary" &&
event.details &&
typeof event.details === "object" &&
event.details.terminal_transition === true &&

View file

@ -1,5 +1,5 @@
/**
* Input sanitization for sf_complete_milestone parameters.
* Input sanitization for complete_milestone parameters.
*
* The Claude SDK deserializes tool-call JSON before the handler runs.
* When an LLM (especially smaller models like haiku) generates large markdown

View file

@ -1,6 +1,6 @@
// SF Bootstrap — session_todo tool registration.
//
// Purpose: expose sf_session_todo as a native agent tool so the agent can
// Purpose: expose manage_todos as a native agent tool so the agent can
// maintain a durable per-session task checklist that survives context
// compaction (items persist in .sf/session_todo.json).
//
@ -13,7 +13,7 @@ import {
} from "../tools/session-todo-tool.js";
/**
* Register the sf_session_todo tool with the pi extension API.
* Register the manage_todos tool with the pi extension API.
*
* Purpose: give the agent a file-backed checklist tool so in-session tasks
* survive context compaction without relying on context-window memory.
@ -21,7 +21,7 @@ import {
*/
export function registerSessionTodoTool(pi) {
pi.registerTool({
name: "sf_session_todo",
name: "manage_todos",
label: "Session Todo",
description:
"Manage a per-session task checklist backed by .sf/session_todo.json. " +

View file

@ -34,7 +34,7 @@ const QUEUE_SAFE_TOOLS = new Set([
// Discussion & planning tools
"ask_user_questions",
"new_milestone_id",
"sf_summary_save",
"save_summary",
// Web research tools used during queue discussion
"search-the-web",
"resolve_library",
@ -411,7 +411,7 @@ export function shouldBlockContextWrite(
};
}
/**
* Check whether a sf_summary_save CONTEXT artifact should be blocked.
* Check whether a save_summary CONTEXT artifact should be blocked.
* Slice-level CONTEXT artifacts are allowed; milestone-level CONTEXT writes
* require the milestone to be depth-verified first.
*/

View file

@ -27,23 +27,23 @@ export const CACHE_MAX = 50;
* sent to the provider stays well under provider limits.
*
* Included tools and why:
* - sf_summary_save: writes CONTEXT.md artifacts (all discuss prompts)
* - sf_decision_save: records decisions (discuss.md output phase)
* - sf_plan_milestone: writes roadmap (discuss.md single/multi milestone)
* - save_summary: writes CONTEXT.md artifacts (all discuss prompts)
* - save_decision: records decisions (discuss.md output phase)
* - plan_milestone: writes roadmap (discuss.md single/multi milestone)
* - new_milestone_id: generates milestone IDs (discuss.md multi-milestone)
* - sf_requirement_update: updates requirements during discuss
* - update_requirement: updates requirements during discuss
*/
export const DISCUSS_TOOLS_ALLOWLIST = [
// Context / summary writing
"sf_summary_save",
"save_summary",
// Decision recording
"sf_decision_save",
"save_decision",
// Milestone planning (needed for discuss.md output phase)
"sf_plan_milestone",
"plan_milestone",
// Milestone ID generation (multi-milestone flow)
"new_milestone_id",
// Requirement updates
"sf_requirement_update",
"update_requirement",
];
/**
* SF tools allowed during research units.
@ -57,7 +57,7 @@ export const DISCUSS_TOOLS_ALLOWLIST = [
* Consumer: guided-flow.ts and auto/run-unit.ts when narrowing SF tools for
* research-milestone and research-slice turns.
*/
export const RESEARCH_TOOLS_ALLOWLIST = ["sf_summary_save", "report_issue"];
export const RESEARCH_TOOLS_ALLOWLIST = ["save_summary", "report_issue"];
/**
* Return the SF tool allowlist for a workflow unit, or null when the full SF
* tool set is appropriate.

View file

@ -526,7 +526,7 @@ export async function updateRequirementInDb(id, updates, basePath) {
// If requirement doesn't exist in DB, seed the entire requirements table
// from REQUIREMENTS.md first (#3346). This handles the standard workflow
// where requirements are authored in markdown during discussion but never
// imported into the database — making sf_requirement_update always fail
// imported into the database — making update_requirement always fail
// with "not_found" at milestone completion.
if (!existing) {
const reqFilePath = resolveSfRootFile(basePath, "REQUIREMENTS");

View file

@ -127,7 +127,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
- `hard_timeout_minutes`: minutes before the supervisor forces termination (default: 30).
- `solver_max_iterations`: maximum autonomous solver iterations for one unit before pausing (default: `30000`, min: `1`, max: `100000`).
- `solver_eval_on_autonomous_exit`: automatically run and record the built-in solver eval when `/autonomous` exits (default: `true`; set `false` only to disable lifecycle eval evidence).
- `completion_nudge_after`: tool calls in a complete-slice unit before nudging the agent to call `sf_slice_complete` (default: 10; set `0` to disable).
- `completion_nudge_after`: tool calls in a complete-slice unit before nudging the agent to call `complete_slice` (default: 10; set `0` to disable).
- `runaway_guard_enabled`: enable active-loop diagnosis for long-running units (default: `true`).
- `runaway_tool_call_warning`: unit tool calls before a runaway warning (default: `60`; set `0` to disable this signal).
- `runaway_token_warning`: unit tokens before a runaway warning (default: `1000000`; set `0` to disable this signal).

View file

@ -1114,7 +1114,7 @@ function auditRequirements(content, options = {}) {
if (!options.includeOwnerWarnings) continue;
// #4414: Downgrade to warning. A newly-created requirement has
// primary_owner='' by default until the planning agent wires it to
// a slice via sf_requirement_update. Flagging as error during normal
// a slice via update_requirement. Flagging as error during normal
// planning is noisy — the real failure is when it persists past
// milestone completion, which is covered by other audits.
issues.push({

View file

@ -722,7 +722,7 @@ export async function showHeadlessMilestoneCreation(
createdAt: Date.now(),
});
// Dispatch as discuss-milestone. The LLM writes PROJECT.md, REQUIREMENTS.md,
// and CONTEXT.md, then calls sf_plan_milestone — this is semantically the
// and CONTEXT.md, then calls plan_milestone — this is semantically the
// discuss path, just non-interactive. Using "plan-milestone" here caused
// model/tool routing to skip discuss-flow tool scoping and
// `checkAutoStartAfterDiscuss` guardrails that rely on the

View file

@ -56,7 +56,7 @@ After all findings, provide an **Overall Verdict**:
- `NEEDS-REMEDIATION` — one or more critical/high findings must be addressed before deploy.
- `ADVISORY` — findings are low severity; proceed with awareness.
Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "CHALLENGE"`, and the full challenge report as content.
Call `save_summary` with `milestone_id: {{milestoneId}}`, `artifact_type: "CHALLENGE"`, and the full challenge report as content.
### Report sf-internal observations

View file

@ -16,7 +16,7 @@ All relevant context has been preloaded below — the roadmap, all slice summari
Then:
1. Use the **Milestone Summary** output template from the inlined context above
2. Inspect the inlined or linked **MILESTONE VALIDATION** verdict. If the verdict is not `pass` (including `needs-attention` or `needs-remediation`), record it as a **verification failure** and follow the failure path. Do NOT call `sf_complete_milestone` for a non-pass validation verdict.
2. Inspect the inlined or linked **MILESTONE VALIDATION** verdict. If the verdict is not `pass` (including `needs-attention` or `needs-remediation`), record it as a **verification failure** and follow the failure path. Do NOT call `complete_milestone` for a non-pass validation verdict.
3. {{skillActivation}}
4. **Verify implementation evidence exists.** Use the inlined validation verdict and slice summaries as the primary proof that implementation happened. If a slice summary lists non-`.sf/` key files, accepted verification commands, or committed work, that satisfies code-change verification for already-integrated milestone work. Run only `git status --short` to check for unresolved local changes. Do **not** inspect git history, compute merge bases, or run branch-diff archaeology unless the validation verdict and slice summaries are missing or contradictory. If no slice summary or validation evidence names implementation files, record this as a **verification failure**.
5. Verify each **success criterion** from the milestone definition in `{{roadmapPath}}`. For each criterion, confirm it was met with specific evidence from slice summaries, test results, or observable behavior. Record any criterion that was NOT met as a **verification failure**.
@ -33,14 +33,14 @@ Before completion, classify leftover work into one of two buckets:
If work falls into the second bucket, do not fail the milestone just because it exists. Record it as a follow-up for a parallel track or later milestone. If it falls into the first bucket, the milestone is not complete.
**DB access safety:** Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `sf_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `sf_*` tools — never via direct SQL.
**DB access safety:** Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `sf_*` tools — never via direct SQL.
### Verification Gate — STOP if verification failed
**If ANY verification failure was recorded in steps 2, 4, 5, or 6, you MUST follow the failure path below. Do NOT proceed to step 11.**
**Failure path** (verification failed):
- Do NOT call `sf_complete_milestone` — the milestone must not be marked as complete.
- Do NOT call `complete_milestone` — the milestone must not be marked as complete.
- Do NOT update `.sf/PROJECT.md` to reflect completion.
- Do NOT update `.sf/REQUIREMENTS.md` to mark requirements as validated.
- A non-pass validation verdict is a verification failure, even if it is terminal for validation-loop purposes.
@ -49,8 +49,8 @@ If work falls into the second bucket, do not fail the milestone just because it
**Success path** (all verifications passed — continue with steps 1014):
10. For each requirement whose status changed in step 9, call `sf_requirement_update` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.sf/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted.
11. **Persist completion through `sf_complete_milestone`.** Call it with the parameters below as soon as steps 2, 4, 5, 6, and 10 are satisfied. Do not keep reading historical commits or re-running broad test suites after the requirements are updated. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
10. For each requirement whose status changed in step 9, call `update_requirement` with the requirement ID and updated `status` and `validation` fields — the tool regenerates `.sf/REQUIREMENTS.md` automatically. Do this BEFORE completing the milestone so requirement updates are persisted.
11. **Persist completion through `complete_milestone`.** Call it with the parameters below as soon as steps 2, 4, 5, 6, and 10 are satisfied. Do not keep reading historical commits or re-running broad test suites after the requirements are updated. The tool updates the milestone status in the DB, renders `{{milestoneSummaryPath}}`, and validates all slices are complete before proceeding.
**Required parameters:**
- `milestoneId` (string) — Milestone ID (e.g. M001)

View file

@ -28,13 +28,13 @@ Then:
3. Run all applicable slice-level verification checks defined in the slice plan. All applicable checks must pass before marking the slice done. If any fail, fix them first. If the inlined context includes **Skipped Tasks**, do not execute verification that belongs only to those skipped tasks; record the evidence gap in the slice summary and UAT instead. Task artifacts use a **flat file layout** directly inside `tasks/` (for example `T01-SUMMARY.md`, `T02-SUMMARY.md`) rather than per-task subdirectories. If you need to count or re-read task summaries during verification, use `find .sf/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` or `ls .sf/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks/*-SUMMARY.md`. Never use `tasks/*/SUMMARY.md` — that glob expects subdirectories that do not exist.
4. If the slice plan includes observability/diagnostic surfaces, confirm they work. Skip this for simple slices that don't have observability sections.
5. Address every gate listed in the **Gates to Close** section above — each gate maps to a specific slice-summary section the handler inspects (for example, Q8 maps to **Operational Readiness**: health signal, failure signal, recovery procedure, and monitoring gaps). Leaving a section empty records the gate as `omitted`.
6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `sf_requirement_update` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.sf/REQUIREMENTS.md` directly — the engine renders it from the database.
7. Prepare the slice completion content you will pass to `sf_slice_complete` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts.
6. If this slice produced evidence that a requirement changed status (Active → Validated, Active → Deferred, etc.), call `update_requirement` with the requirement ID, updated `status`, and `validation` evidence. Do NOT write `.sf/REQUIREMENTS.md` directly — the engine renders it from the database.
7. Prepare the slice completion content you will pass to `complete_slice` using the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`. Do **not** manually write `{{sliceSummaryPath}}`. Do **not** manually write `{{sliceUatPath}}` — the DB-backed tool is the canonical write path for both artifacts.
8. Draft the UAT content you will pass as `uatContent` — a concrete UAT script with real test cases derived from the slice plan and task summaries. Include preconditions, numbered steps with expected outcomes, and edge cases. This must NOT be a placeholder or generic template — tailor every test case to what this slice actually built.
9. Review task summaries for `key_decisions`. Append any significant decisions to `.sf/DECISIONS.md` if missing.
10. Review task summaries for patterns, gotchas, or non-obvious lessons learned. If any would save future agents from repeating investigation or hitting the same issues, append them to `.sf/KNOWLEDGE.md`. Only add entries that are genuinely useful — don't pad with obvious observations.
10b. Scan task summaries and the slice's activity log for sf-internal anomalies that the per-task agents may not have reported individually — repeated `Git stage failed`, `Verification failed … advisory`, `Safety: N unexpected file change(s)`, brittle gate predicates, etc. For any genuine sf-the-tool defect that surfaced during this slice but was NOT already filed via `report_issue`, file it now via `report_issue` with appropriate severity. This is the slice-level sweep — task-level agents file individual reports during execution; the slice-close agent catches systemic issues only visible across multiple tasks.
11. Call `sf_slice_complete` with the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`, plus any optional enrichment fields you have. Do NOT manually mark the roadmap checkbox — the tool writes to the DB, renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}`, and updates the ROADMAP.md projection automatically.
11. Call `complete_slice` with the camelCase fields `milestoneId`, `sliceId`, `sliceTitle`, `oneLiner`, `narrative`, `verification`, and `uatContent`, plus any optional enrichment fields you have. Do NOT manually mark the roadmap checkbox — the tool writes to the DB, renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}`, and updates the ROADMAP.md projection automatically.
12. Do not run git commands — the system commits your changes and handles any merge after this unit succeeds.
13. Update `.sf/PROJECT.md` if it exists — refresh current state if needed: use the `write` tool with `path: ".sf/PROJECT.md"` and `content` containing the full updated document reflecting current project state. Do NOT use the `edit` tool for this — PROJECT.md is a full-document refresh.
@ -42,7 +42,7 @@ Then:
**File system safety:** Task summaries are preloaded in the inlined context above. Task artifacts use a **flat file layout** — files such as `T01-SUMMARY.md` and `T02-SUMMARY.md` live directly inside the `tasks/` directory, not inside per-task subdirectories like `tasks/T01/SUMMARY.md`. If you need to re-read any of them, use `find .sf/milestones/{{milestoneId}}/slices/{{sliceId}}/tasks -name "*-SUMMARY.md"` to list file paths first. Never use `tasks/*/SUMMARY.md`, and never pass `{{slicePath}}` or any other directory path directly to the `read` tool. The `read` tool only accepts file paths, not directories.
**You MUST call `sf_slice_complete` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.**
**You MUST call `complete_slice` with the slice summary and UAT content before finishing. The tool persists to both DB and disk and renders `{{sliceSummaryPath}}` and `{{sliceUatPath}}` automatically.**
When done, say: "Slice {{sliceId}} complete."

View file

@ -41,11 +41,11 @@ Record the deploy run to the database:
If the deploy succeeded:
- Write `deployed_url` to the deploy_runs row.
- Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "DEPLOY"`, and a brief deploy report as content.
- Call `save_summary` with `milestone_id: {{milestoneId}}`, `artifact_type: "DEPLOY"`, and a brief deploy report as content.
If the deploy failed:
- Set `status = 'failed'` in deploy_runs.
- Call `sf_summary_save` with the failure output as content.
- Call `save_summary` with the failure output as content.
- Output `<turn_status>blocked</turn_status>` — do NOT attempt to fix the deploy failure inline; it requires a repair unit.
### Report sf-internal observations

View file

@ -10,7 +10,7 @@ Apply these skills throughout this session:
Run the full four-phase codebase analysis before planning any milestones: (1) orientation map, (2) ultra-granular critical path analysis, (3) technical debt inventory with priority scores, (4) test coverage gaps. Update `.sf/CODEBASE.md` with verified findings, file descriptions for active paths, critical runtime boundaries, verification commands, and skill needs. This is the canonical project-knowledge base for all planning decisions.
### `architecture-planning`
Map the architecture (C4 Level 1-2) before designing milestones. Identify deep vs shallow modules, coupling problems, boundary violations. Every significant architectural decision made during planning gets an ADR in `docs/adr/`. Update `.sf/DECISIONS.md` via `sf_decision_save` for architectural decisions.
Map the architecture (C4 Level 1-2) before designing milestones. Identify deep vs shallow modules, coupling problems, boundary violations. Every significant architectural decision made during planning gets an ADR in `docs/adr/`. Update `.sf/DECISIONS.md` via `save_decision` for architectural decisions.
### `pm-planning`
Apply the `pm-planning` skill throughout this session. Key frameworks to use:
@ -194,7 +194,7 @@ For multi-milestone projects, requirements should span the full vision. Requirem
## PM Strategy Memory
Research findings that shaped planning decisions are saved via `sf_summary_save` with `artifact_type: "RESEARCH"`. The orchestrator persists them to both DB and disk. Do not create separate strategy files in `.sf/`.
Research findings that shaped planning decisions are saved via `save_summary` with `artifact_type: "RESEARCH"`. The orchestrator persists them to both DB and disk. Do not create separate strategy files in `.sf/`.
## Scope Assessment
@ -226,9 +226,9 @@ In a single pass:
Preserve the specification's exact terminology, emphasis, and specific framing. Do not paraphrase domain-specific language into generics. If the spec said "craft feel," write "craft feel" — not "high-quality user experience." The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision.
4. If `depth_verification_{{milestoneId}}_confirm` was confirmed, write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during research. Include an "Assumptions" section documenting every judgment call.
5. If depth verification was not confirmed, call `sf_summary_save` with `artifact_type: "CONTEXT-DRAFT"` and the draft content as `content` — the tool writes `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT-DRAFT.md` to disk. Include the project-knowledge evidence, confidence level, assumptions, open questions, and what must be researched next. Do **not** call `sf_plan_milestone`. End with: "Milestone {{milestoneId}} drafted for discussion."
6. Only after confirmed final context, call `sf_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
7. For each architectural or pattern decision, call `sf_decision_save` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
5. If depth verification was not confirmed, call `save_summary` with `artifact_type: "CONTEXT-DRAFT"` and the draft content as `content` — the tool writes `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT-DRAFT.md` to disk. Include the project-knowledge evidence, confidence level, assumptions, open questions, and what must be researched next. Do **not** call `plan_milestone`. End with: "Milestone {{milestoneId}} drafted for discussion."
6. Only after confirmed final context, call `plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
7. For each architectural or pattern decision, call `save_decision` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
8. {{commitInstruction}}
After writing final context and roadmap, say exactly: "Milestone {{milestoneId}} ready." — nothing else. If you wrote a draft, say exactly: "Milestone {{milestoneId}} drafted for discussion." — nothing else.
@ -240,12 +240,12 @@ After writing final context and roadmap, say exactly: "Milestone {{milestoneId}}
1. For each milestone, call `new_milestone_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .sf/milestones/<ID>/slices` for each.
2. Write `.sf/PROJECT.md` — use the **Project** output template below.
3. Write `.sf/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet.
4. For any architectural or pattern decisions, call `sf_decision_save` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
4. For any architectural or pattern decisions, call `save_decision` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
#### Phase 2: Primary milestone
5. For the primary milestone (the first in sequence), write a full `CONTEXT.md` only if `depth_verification_{{milestoneId}}_confirm` was confirmed. If not confirmed, write `CONTEXT-DRAFT.md`, do not call `sf_plan_milestone`, and stop.
6. After confirmed final context, call `sf_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
5. For the primary milestone (the first in sequence), write a full `CONTEXT.md` only if `depth_verification_{{milestoneId}}_confirm` was confirmed. If not confirmed, write `CONTEXT-DRAFT.md`, do not call `plan_milestone`, and stop.
6. After confirmed final context, call `plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
#### MANDATORY: depends_on Frontmatter in CONTEXT.md
@ -313,7 +313,7 @@ After writing final context and roadmap, say exactly: "Milestone {{milestoneId}}
- **Investigate thoroughly** — scout codebase, check library docs, web search. Same rigor as interactive mode.
- **Build project knowledge first** — use Sift/grep/lsp evidence to identify stack signals, critical paths, verification commands, skill needs, file descriptions, and unresolved gaps before writing context. Update `.sf/CODEBASE.md` only when you need a refreshed durable fallback snapshot.
- **Do focused research** — identify table stakes, domain standards, omissions, scope traps. Same rigor as interactive mode.
- **Use proper tools**`sf_plan_milestone` for roadmaps, `sf_decision_save` for decisions, `new_milestone_id` for IDs
- **Use proper tools**`plan_milestone` for roadmaps, `save_decision` for decisions, `new_milestone_id` for IDs
- **Print artifacts in chat** — requirements table, roadmap preview, depth summary. The TUI scrollback is the user's audit trail.
- **Use depends_on frontmatter** for multi-milestone sequences
- **Anti-reduction rule** — if the spec describes a big vision, plan the big vision. Phase complexity — don't cut it.

View file

@ -68,7 +68,7 @@ If an uncertainty is low-risk or would not change the next artifact, do not ask
After each answer, summarize what materially changed in one concise sentence before continuing. Then update the working context, investigate any newly-opened unknown, and either advance to the next gate/artifact or ask the next focused round.
**Incremental persistence:** After every 2 question rounds (across any layer), silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` using `sf_summary_save` with `artifact_type: "CONTEXT-DRAFT"` and `milestone_id: "{{milestoneId}}"`. This protects confirmed work against session crashes. Do NOT mention this save to the user.
**Incremental persistence:** After every 2 question rounds (across any layer), silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` using `save_summary` with `artifact_type: "CONTEXT-DRAFT"` and `milestone_id: "{{milestoneId}}"`. This protects confirmed work against session crashes. Do NOT mention this save to the user.
### Identify Work Type
@ -344,8 +344,8 @@ When writing CONTEXT.md, include structured sections that map to the discussion
These sections are in addition to whatever other context the discussion surfaced.
4. Write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during discussion.
5. Call `sf_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
6. For each architectural or pattern decision made during discussion, call `sf_decision_save` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
5. Call `plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
6. For each architectural or pattern decision made during discussion, call `save_decision` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
7. {{commitInstruction}}
After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Autonomous mode will start automatically.
@ -359,12 +359,12 @@ Once the user confirms the milestone split:
1. For each milestone, call `new_milestone_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .sf/milestones/<ID>/slices`.
2. Write `.sf/PROJECT.md` — use the **Project** output template below.
3. Write `.sf/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet.
4. For any architectural or pattern decisions made during discussion, call `sf_decision_save` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
4. For any architectural or pattern decisions made during discussion, call `save_decision` — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
#### Phase 2: Primary milestone
5. Write a full `CONTEXT.md` for the primary milestone (the one discussed in depth).
6. Call `sf_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
6. Call `plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
#### MANDATORY: depends_on Frontmatter in CONTEXT.md

View file

@ -7,10 +7,10 @@ Rules:
2. Read before edit.
3. Prefer fixing authoritative artifacts over masking warnings.
4. For missing summaries or UAT files, generate the real artifact from existing slice/task context when possible — do not leave placeholders if you can reconstruct the real content.
5. For a missing milestone `CONTEXT.md` when the milestone is already past `pre-planning` (phase is `executing`, `summarizing`, `validating-milestone`, or `completing-milestone`): the artifact was skipped during bootstrap and must be reconstructed before execution can resume. Read `PROJECT.md`, `REQUIREMENTS.md`, the milestone's `ROADMAP.md`, and any slice-level context on disk, then call `sf_summary_save` with `artifact_type: "CONTEXT"` and the reconstructed context as `content` — the tool writes `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT.md` to disk and persists to DB. Do not leave a stub — the plan gate will reject it on the next cycle.
5. For a missing milestone `CONTEXT.md` when the milestone is already past `pre-planning` (phase is `executing`, `summarizing`, `validating-milestone`, or `completing-milestone`): the artifact was skipped during bootstrap and must be reconstructed before execution can resume. Read `PROJECT.md`, `REQUIREMENTS.md`, the milestone's `ROADMAP.md`, and any slice-level context on disk, then call `save_summary` with `artifact_type: "CONTEXT"` and the reconstructed context as `content` — the tool writes `.sf/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT.md` to disk and persists to DB. Do not leave a stub — the plan gate will reject it on the next cycle.
6. After each repair cluster, verify the relevant invariant directly from disk.
7. When done, rerun `/doctor {{doctorCommandSuffix}}` mentally by ensuring the remaining issue set for this scope is reduced or cleared.
8. Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `sf_milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes.
8. Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes.
## Doctor Summary

View file

@ -45,7 +45,7 @@ Then:
- If you need a one-off script, scratch file, generated fixture, or temporary helper to understand or verify the work, either delete it before completion or promote it into the durable artifact named by the task plan.
- Do not leave duplicate sources of truth. When temporary/seed data is normalized into a canonical location, update downstream code to read only the canonical path and remove or clearly mark the old copy as non-authoritative.
- Do not satisfy verification with an ad-hoc helper when the task asks for a durable harness, command, test, or report. The durable planned artifact must own the repeatable check.
- Before calling `sf_task_complete` with `milestoneId`, `sliceId`, and `taskId`, inspect `git status --short` and make sure every changed/untracked file is intentional, in-scope, and either listed in the task plan/summary or explicitly explained as a local adaptation.
- Before calling `complete_task` with `milestoneId`, `sliceId`, and `taskId`, inspect `git status --short` and make sure every changed/untracked file is intentional, in-scope, and either listed in the task plan/summary or explicitly explained as a local adaptation.
6. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail).
7. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply.
@ -88,8 +88,8 @@ Then:
18. If you made an architectural, pattern, library, or observability decision during this task that downstream work should know about, append it to `.sf/DECISIONS.md` (read the template at `~/.sf/agent/extensions/sf/templates/decisions.md` if the file doesn't exist yet). Not every task produces decisions — only append when a meaningful choice was made.
19. If you discover a non-obvious rule, recurring gotcha, or useful pattern during execution, append it to `.sf/KNOWLEDGE.md`. Only add entries that would save future agents from repeating your investigation. Don't add obvious things.
20. Read the template at `~/.sf/agent/extensions/sf/templates/task-summary.md`
21. Use that template to prepare the completion content you will pass to `sf_task_complete` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you.
22. Call `sf_task_complete` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically.
21. Use that template to prepare the completion content you will pass to `complete_task` using the camelCase fields `milestoneId`, `sliceId`, `taskId`, `oneLiner`, `narrative`, `verification`, and `verificationEvidence`. Do **not** manually write `{{taskSummaryPath}}` — the DB-backed tool is the canonical write path and renders the summary file for you.
22. Call `complete_task` with milestoneId, sliceId, taskId, and the completion fields derived from the template. This is your final required step — do NOT manually edit PLAN.md checkboxes. The tool marks the task complete, updates the DB, renders `{{taskSummaryPath}}`, and updates PLAN.md automatically.
23. Do not run git commands — the system reads your task summary after completion and creates a meaningful commit from it (type inferred from title, message from your one-liner, key files from frontmatter). Write a clear, specific one-liner in the summary — it becomes the commit message.
All work stays in your working directory: `{{workingDirectory}}`.
@ -98,7 +98,7 @@ All work stays in your working directory: `{{workingDirectory}}`.
{{escalationGuidance}}
**You MUST call `sf_task_complete` before finishing. Do not manually write `{{taskSummaryPath}}`.**
**You MUST call `complete_task` before finishing. Do not manually write `{{taskSummaryPath}}`.**
When done, say: "Task {{taskId}} complete."
@ -106,6 +106,6 @@ When done, say: "Task {{taskId}} complete."
**After completing the task, output exactly one of these markers to signal state to the harness:**
- `<turn_status>complete</turn_status>` if task verification passed and you called `sf_task_complete`
- `<turn_status>complete</turn_status>` if task verification passed and you called `complete_task`
- `<turn_status>blocked</turn_status>` if you discovered a blocker (missing prereq, broken upstream, third-party failure, or plan invalid)
- `<turn_status>giving_up</turn_status>` if you've tried multiple approaches and are out of reasonable next steps without human input

View file

@ -116,7 +116,7 @@ A unit dispatched more than once (`type/id` appears multiple times) indicates a
5. **Read the actual SF source code** at `{{sfSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
**DB inspection:** If you need to check DB state as part of investigation, use `sf_milestone_status` — never run `sqlite3 .sf/sf.db` or `node -e require('better-sqlite3')` directly. The engine holds a WAL write lock; direct access will either fail or return stale data.
**DB inspection:** If you need to check DB state as part of investigation, use `milestone_status` — never run `sqlite3 .sf/sf.db` or `node -e require('better-sqlite3')` directly. The engine holds a WAL write lock; direct access will either fail or return stale data.
6. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.

View file

@ -1,4 +1,4 @@
Complete slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Your working directory is `{{workingDirectory}}` — all file operations must use this path. All tasks are done. Your slice summary is the primary record of what was built — downstream agents (reassess-roadmap, future slice researchers) read it to understand what this slice delivered and what to watch out for. Use the **Slice Summary** and **UAT** output templates below to understand the expected structure. {{skillActivation}} Call `sf_slice_complete` to record completion — the tool writes `{{sliceId}}-SUMMARY.md`, `{{sliceId}}-UAT.md`, and toggles the roadmap checkbox atomically. Fill the `UAT Type` plus `Not Proven By This UAT` sections explicitly in `uatContent` so the artifact states what class of acceptance it covers and what still remains unproven. Review task summaries for `key_decisions` and ensure any significant ones are in `.sf/DECISIONS.md`. If the slice involved runtime behavior, fill the Operational Readiness section (Q8) in the summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit for simple slices. Do not commit or merge manually — the system handles this after the unit completes.
Complete slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Your working directory is `{{workingDirectory}}` — all file operations must use this path. All tasks are done. Your slice summary is the primary record of what was built — downstream agents (reassess-roadmap, future slice researchers) read it to understand what this slice delivered and what to watch out for. Use the **Slice Summary** and **UAT** output templates below to understand the expected structure. {{skillActivation}} Call `complete_slice` to record completion — the tool writes `{{sliceId}}-SUMMARY.md`, `{{sliceId}}-UAT.md`, and toggles the roadmap checkbox atomically. Fill the `UAT Type` plus `Not Proven By This UAT` sections explicitly in `uatContent` so the artifact states what class of acceptance it covers and what still remains unproven. Review task summaries for `key_decisions` and ensure any significant ones are in `.sf/DECISIONS.md`. If the slice involved runtime behavior, fill the Operational Readiness section (Q8) in the summary: health signal, failure signal, recovery procedure, and monitoring gaps. Omit for simple slices. Do not commit or merge manually — the system handles this after the unit completes.
### Report sf-internal observations

View file

@ -53,7 +53,7 @@ After each answer, summarize what materially changed in one concise sentence, up
After each round of answers, decide whether you already have enough depth to write a strong context file.
- **Incremental persistence:** After every 2 question rounds, silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` with your current understanding using `sf_summary_save` with `artifact_type: "CONTEXT-DRAFT"`. This protects against session crashes losing all confirmed work. Do NOT mention this save to the user — it's invisible bookkeeping. The final context file will overwrite it.
- **Incremental persistence:** After every 2 question rounds, silently save a `{{milestoneId}}-CONTEXT-DRAFT.md` with your current understanding using `save_summary` with `artifact_type: "CONTEXT-DRAFT"`. This protects against session crashes losing all confirmed work. Do NOT mention this save to the user — it's invisible bookkeeping. The final context file will overwrite it.
- If not ready, investigate any newly-opened unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
- Use a single wrap-up prompt only when you genuinely believe the depth checklist is satisfied or the user signals they want to stop.
- **If `{{structuredQuestionsAvailable}}` is `true` and you need that wrap-up prompt:** use `ask_user_questions` with options:
@ -121,6 +121,6 @@ Once the user confirms depth:
1. Use the **Context** output template below
2. `mkdir -p` the milestone directory if needed
3. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "CONTEXT"`, and the full context markdown as `content` — the tool writes the file to disk and persists to DB. Preserve the user's exact terminology, emphasis, and framing in the content. Do not paraphrase nuance into generic summaries. The context file is downstream agents' only window into this conversation.
3. Call `save_summary` with `milestone_id: {{milestoneId}}`, `artifact_type: "CONTEXT"`, and the full context markdown as `content` — the tool writes the file to disk and persists to DB. Preserve the user's exact terminology, emphasis, and framing in the content. Do not paraphrase nuance into generic summaries. The context file is downstream agents' only window into this conversation.
4. {{commitInstruction}}
5. Say exactly: `"{{milestoneId}} context written."` — nothing else.

View file

@ -1,6 +1,6 @@
**Working directory:** `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory. For `.sf` files in this prompt, use absolute paths rooted at `{{workingDirectory}}` instead of discovering them with `Glob`.
Discuss the **project** as a whole. Identify gray areas at the project level — vision, users, anti-goals, key constraints — ask the user about them, and produce `.sf/PROJECT.md` with the decisions via `sf_summary_save`. Use the **Project** output template below. If a `SF Skill Preferences` block is present in system context, use it to decide which skills to load and follow; do not override required artifact rules.
Discuss the **project** as a whole. Identify gray areas at the project level — vision, users, anti-goals, key constraints — ask the user about them, and produce `.sf/PROJECT.md` with the decisions via `save_summary`. Use the **Project** output template below. If a `SF Skill Preferences` block is present in system context, use it to decide which skills to load and follow; do not override required artifact rules.
This stage runs ONCE per project, before any milestone-level discussion. It produces the project-level context that all subsequent milestones, requirements, and roadmaps will reference.
@ -62,7 +62,7 @@ After each round, investigate further if any answer opens a new unknown, then as
After each round, decide whether you have enough depth to write a strong PROJECT.md.
- **Incremental persistence:** After every 2 question rounds, silently save `.sf/PROJECT-DRAFT.md` using `sf_summary_save` with `artifact_type: "PROJECT-DRAFT"` and no `milestone_id`. Crash protection. Do NOT mention this save to the user.
- **Incremental persistence:** After every 2 question rounds, silently save `.sf/PROJECT-DRAFT.md` using `save_summary` with `artifact_type: "PROJECT-DRAFT"` and no `milestone_id`. Crash protection. Do NOT mention this save to the user.
- If not ready, continue to the next round.
- Use a wrap-up prompt only when you believe the depth checklist below is satisfied or the user signals they want to stop.
@ -125,7 +125,7 @@ The depth verification is the only required confirmation gate. Do not add a seco
Once the user confirms depth:
1. Use the **Project** output template (inlined above).
2. Call `sf_summary_save` with `artifact_type: "PROJECT"` and the full project markdown as `content`; omit `milestone_id`. The tool writes `.sf/PROJECT.md` to disk and persists to DB. Preserve the user's exact terminology, emphasis, and framing.
2. Call `save_summary` with `artifact_type: "PROJECT"` and the full project markdown as `content`; omit `milestone_id`. The tool writes `.sf/PROJECT.md` to disk and persists to DB. Preserve the user's exact terminology, emphasis, and framing.
3. The `## Capability Contract` section MUST reference `.sf/REQUIREMENTS.md` — that file does not yet exist; the next stage (`discuss-requirements`) will produce it.
4. The `## Milestone Sequence` MUST list at least M001 with title and one-liner. Subsequent milestones may be listed as known intents; they will be elaborated in their own discuss-milestone stages.
5. Do NOT use `artifact_type: "CONTEXT"` and do NOT pass `milestone_id: "PROJECT"`; that creates a fake milestone named PROJECT.

View file

@ -1,6 +1,6 @@
**Working directory:** `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory. For `.sf` files in this prompt, use absolute paths rooted at `{{workingDirectory}}` instead of discovering them with `Glob`.
Discuss **project-level requirements**. Read `.sf/PROJECT.md` first — it is the authoritative source for vision, core value, anti-goals, and milestone sequence. All requirements must trace back to it. Identify gray areas about what capabilities the project must deliver, ask the user, and produce `.sf/REQUIREMENTS.md` via `sf_summary_save` using the v2 structured `R###` format. Use the **Requirements** output template below.
Discuss **project-level requirements**. Read `.sf/PROJECT.md` first — it is the authoritative source for vision, core value, anti-goals, and milestone sequence. All requirements must trace back to it. Identify gray areas about what capabilities the project must deliver, ask the user, and produce `.sf/REQUIREMENTS.md` via `save_summary` using the v2 structured `R###` format. Use the **Requirements** output template below.
This stage runs ONCE per project, after `discuss-project` and before any milestone-level work. It produces the explicit capability contract that all milestones, slices, and verification will reference.
@ -57,7 +57,7 @@ Ask **13 questions per round**. Each round targets one dimension:
### Round cadence
- **Incremental persistence:** After every 2 question rounds, silently save the current requirements draft using `sf_summary_save` with `artifact_type: "REQUIREMENTS-DRAFT"` and no `milestone_id`. Crash protection. Do NOT mention this save.
- **Incremental persistence:** After every 2 question rounds, silently save the current requirements draft using `save_summary` with `artifact_type: "REQUIREMENTS-DRAFT"` and no `milestone_id`. Crash protection. Do NOT mention this save.
- Continue rounds until the depth checklist is satisfied or the user signals stop.
---
@ -113,8 +113,8 @@ If they adjust, absorb and re-verify.
Once the user confirms:
1. Use the **Requirements** output template (inlined above) to render the final markdown in working memory.
2. Every entry must conform to the `R###` format with all listed fields. Use `sf_requirement_save` (NOT plain file edit) for each requirement so DB state is saved first.
3. After all `sf_requirement_save` calls complete, call `sf_summary_save` with `artifact_type: "REQUIREMENTS"`; omit `milestone_id`. The requirements table is the source of truth, and this tool renders `.sf/REQUIREMENTS.md` from DB state. Pass the rendered markdown as `content` for audit context only; do not rely on markdown to update DB rows.
2. Every entry must conform to the `R###` format with all listed fields. Use `save_requirement` (NOT plain file edit) for each requirement so DB state is saved first.
3. After all `save_requirement` calls complete, call `save_summary` with `artifact_type: "REQUIREMENTS"`; omit `milestone_id`. The requirements table is the source of truth, and this tool renders `.sf/REQUIREMENTS.md` from DB state. Pass the rendered markdown as `content` for audit context only; do not rely on markdown to update DB rows.
4. The file MUST contain all required sections: `## Active`, `## Validated`, `## Deferred`, `## Out of Scope`, `## Traceability`, `## Coverage Summary`. Empty sections are OK; missing sections are not.
5. Print the final coverage summary in chat: `Active: N | Validated: N | Deferred: N | Out of Scope: N | Mapped to slices: N | Unmapped active: N`.
6. Do NOT use `artifact_type: "CONTEXT"` and do NOT pass `milestone_id: "REQUIREMENTS"`; that creates a fake milestone instead of `.sf/REQUIREMENTS.md`.

View file

@ -45,7 +45,7 @@ After each answer, summarize what materially changed in one concise sentence, up
After each round of answers, decide whether you already have enough signal to write the slice context cleanly.
- **Incremental persistence:** After every 2 question rounds, silently save a draft `{{sliceId}}-CONTEXT-DRAFT.md` in `{{sliceDirPath}}` using `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "CONTEXT-DRAFT"`. This protects against session crashes losing confirmed work. Do NOT mention this to the user. The final context file will replace it.
- **Incremental persistence:** After every 2 question rounds, silently save a draft `{{sliceId}}-CONTEXT-DRAFT.md` in `{{sliceDirPath}}` using `save_summary` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "CONTEXT-DRAFT"`. This protects against session crashes losing confirmed work. Do NOT mention this to the user. The final context file will replace it.
- If not, investigate any new unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
- Ask a single wrap-up question only when you genuinely believe the slice is well understood or the user signals they want to stop.
- When you do ask it, offer two choices: "Write the context file" *(recommended when the slice is well understood)* or "One more pass". Use `ask_user_questions` if available, otherwise ask in plain text.
@ -60,7 +60,7 @@ Once the user has explicitly confirmed they are ready to write the context file:
1. Use the **Slice Context** output template below
2. `mkdir -p {{sliceDirPath}}`
3. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "CONTEXT"`, and the context as `content` — the tool writes the file to disk and persists to DB. Use the template structure, filling in:
3. Call `save_summary` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "CONTEXT"`, and the context as `content` — the tool writes the file to disk and persists to DB. Use the template structure, filling in:
- **Goal** — one sentence: what this slice delivers
- **Why this Slice** — why now, what it unblocks
- **Scope / In Scope** — what was confirmed in scope during the interview

View file

@ -1,4 +1,4 @@
Execute the next task: {{taskId}} ("{{taskTitle}}") in slice {{sliceId}} of milestone {{milestoneId}}. Read the task plan (`{{taskId}}-PLAN.md`), load relevant summaries from prior tasks, and execute each step. Before implementation, run the swarm opportunity check: use a 2-3 worker same-model `subagent({ tasks: [...] })` swarm only when the task splits into independent shards with explicit disjoint file/directory ownership, no shared-interface or lockfile edits, shard-local verification, and clear wall-clock savings; otherwise execute single-agent. If you swarm, give each worker its write scope and expected output files, then inspect `git status --short`, synthesize, resolve conflicts, and run final verification yourself. Verify must-haves when done. If the task touches UI, browser flows, DOM behavior, or user-visible web state, exercise the real flow in the browser, prefer `browser_batch` for obvious sequences, prefer `browser_assert` for explicit pass/fail verification, use `browser_diff` when an action's effect is ambiguous, and use browser diagnostics when validating async or failure-prone UI. If you made an architectural, pattern, or library decision, append it to `.sf/DECISIONS.md`. Use the **Task Summary** output template below. Call `sf_task_complete` to record completion (it writes the summary, toggles the checkbox, and persists to DB atomically). {{skillActivation}} If running long and not all steps are finished, stop implementing and prioritize writing a clean partial summary over attempting one more step — a recoverable handoff is more valuable than a half-finished step with no documentation. If verification fails, debug methodically: form a hypothesis and test that specific theory before changing anything, change one variable at a time, read entire functions not just the suspect line, distinguish observable facts from assumptions, and if 3+ fixes fail without progress stop and reassess your mental model — list what you know for certain, what you've ruled out, and form fresh hypotheses. Don't fix symptoms — understand why something fails before changing code. If the task plan includes Failure Modes, Load Profile, or Negative Tests sections, implement and verify them: handle each dependency's error/timeout/malformed paths (Q5), protect against identified 10x breakpoints (Q6), and write specified negative test cases (Q7).
Execute the next task: {{taskId}} ("{{taskTitle}}") in slice {{sliceId}} of milestone {{milestoneId}}. Read the task plan (`{{taskId}}-PLAN.md`), load relevant summaries from prior tasks, and execute each step. Before implementation, run the swarm opportunity check: use a 2-3 worker same-model `subagent({ tasks: [...] })` swarm only when the task splits into independent shards with explicit disjoint file/directory ownership, no shared-interface or lockfile edits, shard-local verification, and clear wall-clock savings; otherwise execute single-agent. If you swarm, give each worker its write scope and expected output files, then inspect `git status --short`, synthesize, resolve conflicts, and run final verification yourself. Verify must-haves when done. If the task touches UI, browser flows, DOM behavior, or user-visible web state, exercise the real flow in the browser, prefer `browser_batch` for obvious sequences, prefer `browser_assert` for explicit pass/fail verification, use `browser_diff` when an action's effect is ambiguous, and use browser diagnostics when validating async or failure-prone UI. If you made an architectural, pattern, or library decision, append it to `.sf/DECISIONS.md`. Use the **Task Summary** output template below. Call `complete_task` to record completion (it writes the summary, toggles the checkbox, and persists to DB atomically). {{skillActivation}} If running long and not all steps are finished, stop implementing and prioritize writing a clean partial summary over attempting one more step — a recoverable handoff is more valuable than a half-finished step with no documentation. If verification fails, debug methodically: form a hypothesis and test that specific theory before changing anything, change one variable at a time, read entire functions not just the suspect line, distinguish observable facts from assumptions, and if 3+ fixes fail without progress stop and reassess your mental model — list what you know for certain, what you've ruled out, and form fresh hypotheses. Don't fix symptoms — understand why something fails before changing code. If the task plan includes Failure Modes, Load Profile, or Negative Tests sections, implement and verify them: handle each dependency's error/timeout/malformed paths (Q5), protect against identified 10x breakpoints (Q6), and write specified negative test cases (Q7).
### Report sf-internal observations

View file

@ -1,6 +1,6 @@
Plan milestone {{milestoneId}} ("{{milestoneTitle}}"). Read `.sf/DECISIONS.md` if it exists — respect existing decisions. Read `.sf/REQUIREMENTS.md` if it exists and treat Active requirements as the capability contract. If `REQUIREMENTS.md` is missing, treat that as a planning gap: derive the minimum requirement coverage from current project evidence, persist it through SF planning tools, and explicitly note missing coverage. Use the **Roadmap** output template below to shape the milestone planning payload you send to `sf_plan_milestone`. Start the `vision` field with the milestone purpose before implementation detail, include the structured `productResearch` payload when the work is product-facing, workflow-facing, developer-experience, or market-positioning, and make each slice `goal` state the slice purpose before mechanics. If the milestone changes how SF is driven, observed, integrated, or automated, keep the axes separate in the roadmap: surface (TUI/CLI/web/editor/machine), protocol (ACP/RPC/stdio/HTTP/wire), output format (text/json/stream-json), run control (manual/assisted/autonomous), and permission profile (restricted/normal/trusted/unrestricted). Call `sf_plan_milestone` to persist the milestone planning fields and render `{{milestoneId}}-ROADMAP.md` from DB state. Do **not** write `{{milestoneId}}-ROADMAP.md`, `ROADMAP.md`, or other planning artifacts manually. If planning produces structural decisions, append them to `.sf/DECISIONS.md`. {{skillActivation}} Fill the Horizontal Checklist section with cross-cutting concerns considered during planning (requirements re-read, decisions re-evaluated, graceful shutdown, revenue paths, auth boundary, shared resources, reconnection). Omit for trivial milestones.
Plan milestone {{milestoneId}} ("{{milestoneTitle}}"). Read `.sf/DECISIONS.md` if it exists — respect existing decisions. Read `.sf/REQUIREMENTS.md` if it exists and treat Active requirements as the capability contract. If `REQUIREMENTS.md` is missing, treat that as a planning gap: derive the minimum requirement coverage from current project evidence, persist it through SF planning tools, and explicitly note missing coverage. Use the **Roadmap** output template below to shape the milestone planning payload you send to `plan_milestone`. Start the `vision` field with the milestone purpose before implementation detail, include the structured `productResearch` payload when the work is product-facing, workflow-facing, developer-experience, or market-positioning, and make each slice `goal` state the slice purpose before mechanics. If the milestone changes how SF is driven, observed, integrated, or automated, keep the axes separate in the roadmap: surface (TUI/CLI/web/editor/machine), protocol (ACP/RPC/stdio/HTTP/wire), output format (text/json/stream-json), run control (manual/assisted/autonomous), and permission profile (restricted/normal/trusted/unrestricted). Call `plan_milestone` to persist the milestone planning fields and render `{{milestoneId}}-ROADMAP.md` from DB state. Do **not** write `{{milestoneId}}-ROADMAP.md`, `ROADMAP.md`, or other planning artifacts manually. If planning produces structural decisions, append them to `.sf/DECISIONS.md`. {{skillActivation}} Fill the Horizontal Checklist section with cross-cutting concerns considered during planning (requirements re-read, decisions re-evaluated, graceful shutdown, revenue paths, auth boundary, shared resources, reconnection). Omit for trivial milestones.
Before calling `sf_plan_milestone`, run a bounded **Vision Alignment Meeting** for the milestone and roadmap as a real multi-agent review. Use the `subagent` tool in `mode: "debate"` with `rounds: 2` and a separate task for each participant lens below. Do **not** merely simulate every participant inside this planner response. Use only supported agent names: `planner`, `reviewer`, `researcher`, and `scout`. Put the stakeholder role name inside the task text; do not invent agent names such as `combatant`, `delivery-lead`, `product-manager`, or `customer-panel`. If the `subagent` tool is unavailable or fails after one retry, record that explicitly in `trigger` and run the structured meeting inline as a degraded fallback. This is allowed to be broader and more nuanced than slice planning. Include at least these participant lenses:
Before calling `plan_milestone`, run a bounded **Vision Alignment Meeting** for the milestone and roadmap as a real multi-agent review. Use the `subagent` tool in `mode: "debate"` with `rounds: 2` and a separate task for each participant lens below. Do **not** merely simulate every participant inside this planner response. Use only supported agent names: `planner`, `reviewer`, `researcher`, and `scout`. Put the stakeholder role name inside the task text; do not invent agent names such as `combatant`, `delivery-lead`, `product-manager`, or `customer-panel`. If the `subagent` tool is unavailable or fails after one retry, record that explicitly in `trigger` and run the structured meeting inline as a degraded fallback. This is allowed to be broader and more nuanced than slice planning. Include at least these participant lenses:
- Product Manager
- User Advocate
- Customer Panel

View file

@ -1,4 +1,4 @@
Plan slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements the roadmap says this slice owns or supports, and ensure the plan delivers them. Read the roadmap boundary map, any existing context/research files, and dependency summaries. Use the **Slice Plan** and **Task Plan** output templates below. Decompose into tasks with must-haves. Fill the `Proof Level` and `Integration Closure` sections truthfully so the plan says what class of proof this slice really delivers and what end-to-end wiring still remains. If the slice changes how SF is driven, observed, integrated, or automated, fill `Interface Axes` and keep surface (TUI/CLI/web/editor/machine), protocol (ACP/RPC/stdio/HTTP/wire), output format (text/json/stream-json), run control (manual/assisted/autonomous), and permission profile (restricted/normal/trusted/unrestricted) separate. For each task, decide whether execution can safely swarm: mark it swarmable only if it can split into 2-3 independent shards with disjoint file/directory ownership, shard-local verification, and no shared-interface, lockfile, migration, generated-artifact, or sequencing conflict; otherwise make the task explicitly single-agent. Call `sf_plan_slice` to persist the slice plan — the tool writes `{{sliceId}}-PLAN.md` and individual `T##-PLAN.md` files to disk and persists to DB. The `sf_plan_slice` payload MUST include `planningMeeting` as a populated object; empty, null, or missing planningMeeting is not acceptable. Use the canonical M004 meeting roles: Trigger, Product Manager, User Advocate, Customer Panel, Business, Researcher, Delivery Lead, Partner, Combatant, Architect, Moderator, Recommended Route, and Confidence. The tool's Product Manager field is named `pm`, and the Confidence field is named `confidenceSummary`; keep existing tool field names while covering the canonical roles. If you are tempted to skip the meeting because the slice is simple, write a brief one-line per role explaining why it is simple. Do **not** write plan files manually — use the DB-backed tool so state stays consistent. If planning produces structural decisions, call `sf_decision_save` for each — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically. {{skillActivation}} Before finishing, self-audit the plan: every must-have maps to at least one task, every task has complete sections (steps, must-haves, verification, observability impact, inputs, and expected output), task ordering is consistent with no circular references, every pair of artifacts that must connect has an explicit wiring step, task scope targets 25 steps and 38 files (68 steps or 810 files — consider splitting; 10+ steps or 12+ files — must split), any swarmable task has disjoint Expected Output paths/directories and explains shard ownership, the plan honors locked decisions from context/research/decisions artifacts, the proof-level wording does not overclaim live integration if only fixture/contract proof is planned, every Active requirement this slice owns has at least one task with verification that proves it is met, and every task produces real user-facing progress — if the slice has a UI surface at least one task builds the real UI, if it has an API at least one task connects it to a real data source, and showing the completed result to a non-technical stakeholder would demonstrate real product progress rather than developer artifacts, and quality gate coverage — for non-trivial slices, Threat Surface (Q3: abuse, data exposure, input trust) and Requirement Impact (Q4: requirements touched, re-verify, decisions revisited) sections are present. For non-trivial tasks, Failure Modes (Q5), Load Profile (Q6), Negative Tests (Q7), and Interface Impact when relevant are filled in task plans.
Plan slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements the roadmap says this slice owns or supports, and ensure the plan delivers them. Read the roadmap boundary map, any existing context/research files, and dependency summaries. Use the **Slice Plan** and **Task Plan** output templates below. Decompose into tasks with must-haves. Fill the `Proof Level` and `Integration Closure` sections truthfully so the plan says what class of proof this slice really delivers and what end-to-end wiring still remains. If the slice changes how SF is driven, observed, integrated, or automated, fill `Interface Axes` and keep surface (TUI/CLI/web/editor/machine), protocol (ACP/RPC/stdio/HTTP/wire), output format (text/json/stream-json), run control (manual/assisted/autonomous), and permission profile (restricted/normal/trusted/unrestricted) separate. For each task, decide whether execution can safely swarm: mark it swarmable only if it can split into 2-3 independent shards with disjoint file/directory ownership, shard-local verification, and no shared-interface, lockfile, migration, generated-artifact, or sequencing conflict; otherwise make the task explicitly single-agent. Call `plan_slice` to persist the slice plan — the tool writes `{{sliceId}}-PLAN.md` and individual `T##-PLAN.md` files to disk and persists to DB. The `plan_slice` payload MUST include `planningMeeting` as a populated object; empty, null, or missing planningMeeting is not acceptable. Use the canonical M004 meeting roles: Trigger, Product Manager, User Advocate, Customer Panel, Business, Researcher, Delivery Lead, Partner, Combatant, Architect, Moderator, Recommended Route, and Confidence. The tool's Product Manager field is named `pm`, and the Confidence field is named `confidenceSummary`; keep existing tool field names while covering the canonical roles. If you are tempted to skip the meeting because the slice is simple, write a brief one-line per role explaining why it is simple. Do **not** write plan files manually — use the DB-backed tool so state stays consistent. If planning produces structural decisions, call `save_decision` for each — the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically. {{skillActivation}} Before finishing, self-audit the plan: every must-have maps to at least one task, every task has complete sections (steps, must-haves, verification, observability impact, inputs, and expected output), task ordering is consistent with no circular references, every pair of artifacts that must connect has an explicit wiring step, task scope targets 25 steps and 38 files (68 steps or 810 files — consider splitting; 10+ steps or 12+ files — must split), any swarmable task has disjoint Expected Output paths/directories and explains shard ownership, the plan honors locked decisions from context/research/decisions artifacts, the proof-level wording does not overclaim live integration if only fixture/contract proof is planned, every Active requirement this slice owns has at least one task with verification that proves it is met, and every task produces real user-facing progress — if the slice has a UI surface at least one task builds the real UI, if it has an API at least one task connects it to a real data source, and showing the completed result to a non-technical stakeholder would demonstrate real product progress rather than developer artifacts, and quality gate coverage — for non-trivial slices, Threat Surface (Q3: abuse, data exposure, input trust) and Requirement Impact (Q4: requirements touched, re-verify, decisions revisited) sections are present. For non-trivial tasks, Failure Modes (Q5), Load Profile (Q6), Negative Tests (Q7), and Interface Impact when relevant are filled in task plans.
### Report sf-internal observations

View file

@ -1,4 +1,4 @@
Research slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions, don't contradict them. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements this slice owns or supports and target research toward risks, unknowns, and constraints that could affect delivery of those requirements. {{skillActivation}} Use native `lsp` first for symbol lookup, references, and cross-file navigation. For direct text inspection use `rg`/`find` for targeted reads, or `scout` if the area is broad or unfamiliar. If the repository is checked out locally, GitHub code search is a scarce remote-only fallback: do not use GitHub `/search/code` for that local repo; use `git grep` for tracked-file global search, `rg` for broader worktree text search, plus `lsp`, `sift_search`, or `codebase_search` instead. GitHub's `code_search` bucket is small and separate from normal REST/GraphQL quotas, so use it only for repositories that are not on disk, dedupe repeated queries, and treat `403` rate-limit responses as a signal to wait for reset or continue with local evidence. If there are 2-3 independent unknowns, use a research swarm with parallel `scout`/`researcher` subagents and synthesize their findings here; do not swarm narrow sequence-dependent research. Check libraries DeepWiki-first: `ask_question` / `read_wiki_structure` / `read_wiki_contents` for any GitHub-hosted library; fall back to `resolve_library` / `get_library_docs` (Context7, capped at 1000 req/month free) for npm/pypi/crates packages DeepWiki doesn't have. Skip both for libraries already used in this codebase. Use the **Research** output template below. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the research content — the tool writes the file to disk and persists to DB. After `sf_summary_save` succeeds, stop immediately; do **not** call `new_milestone_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool.
Research slice {{sliceId}} ("{{sliceTitle}}") of milestone {{milestoneId}}. Read `.sf/DECISIONS.md` if it exists — respect existing decisions, don't contradict them. Read `.sf/REQUIREMENTS.md` if it exists — identify which Active requirements this slice owns or supports and target research toward risks, unknowns, and constraints that could affect delivery of those requirements. {{skillActivation}} Use native `lsp` first for symbol lookup, references, and cross-file navigation. For direct text inspection use `rg`/`find` for targeted reads, or `scout` if the area is broad or unfamiliar. If the repository is checked out locally, GitHub code search is a scarce remote-only fallback: do not use GitHub `/search/code` for that local repo; use `git grep` for tracked-file global search, `rg` for broader worktree text search, plus `lsp`, `sift_search`, or `codebase_search` instead. GitHub's `code_search` bucket is small and separate from normal REST/GraphQL quotas, so use it only for repositories that are not on disk, dedupe repeated queries, and treat `403` rate-limit responses as a signal to wait for reset or continue with local evidence. If there are 2-3 independent unknowns, use a research swarm with parallel `scout`/`researcher` subagents and synthesize their findings here; do not swarm narrow sequence-dependent research. Check libraries DeepWiki-first: `ask_question` / `read_wiki_structure` / `read_wiki_contents` for any GitHub-hosted library; fall back to `resolve_library` / `get_library_docs` (Context7, capped at 1000 req/month free) for npm/pypi/crates packages DeepWiki doesn't have. Skip both for libraries already used in this codebase. Use the **Research** output template below. Call `save_summary` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the research content — the tool writes the file to disk and persists to DB. After `save_summary` succeeds, stop immediately; do **not** call `new_milestone_id`, `plan_milestone`, `plan_slice`, `plan_task`, or any planning/creation tool.
**You are the scout.** A planner agent reads your output in a fresh context to decompose this slice into tasks. Write for the planner — surface key files, where the work divides naturally, what to build first, and how to verify. If the research doc is vague, the planner re-explores code you already read. If it's precise, the planner decomposes immediately.

View file

@ -83,12 +83,12 @@ Then:
2. {{skillActivation}}
3. Create the roadmap: start with the milestone purpose in the `vision` field, include a structured `productResearch` payload when applicable, then decompose into demoable vertical slices - as many as the work genuinely needs, no more. A simple feature might be 1 slice. Don't decompose for decomposition's sake.
4. Order by risk (high-risk first)
5. Call `sf_plan_milestone` to persist the milestone planning fields, slice rows, and **horizontal checklist** in the DB-backed planning path. Every slice `goal` must state the slice purpose before implementation detail. If the milestone changes how SF is driven, observed, integrated, or automated, keep the axes separate in the roadmap: surface (TUI/CLI/web/editor/machine), protocol (ACP/RPC/stdio/HTTP/wire), output format (text/json/stream-json), run control (manual/assisted/autonomous), and permission profile (restricted/normal/trusted/unrestricted). Do **not** write `{{outputPath}}`, `ROADMAP.md`, or other planning artifacts manually - the planning tool owns roadmap rendering and persistence.
6. If planning produced structural decisions (e.g. slice ordering rationale, technology choices, scope exclusions), call `sf_decision_save` for each decision - the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
5. Call `plan_milestone` to persist the milestone planning fields, slice rows, and **horizontal checklist** in the DB-backed planning path. Every slice `goal` must state the slice purpose before implementation detail. If the milestone changes how SF is driven, observed, integrated, or automated, keep the axes separate in the roadmap: surface (TUI/CLI/web/editor/machine), protocol (ACP/RPC/stdio/HTTP/wire), output format (text/json/stream-json), run control (manual/assisted/autonomous), and permission profile (restricted/normal/trusted/unrestricted). Do **not** write `{{outputPath}}`, `ROADMAP.md`, or other planning artifacts manually - the planning tool owns roadmap rendering and persistence.
6. If planning produced structural decisions (e.g. slice ordering rationale, technology choices, scope exclusions), call `save_decision` for each decision - the tool auto-assigns IDs and regenerates `.sf/DECISIONS.md` automatically.
### productResearch payload
When the milestone is product-facing, workflow-facing, developer-experience, or market-positioning, include `productResearch` in the `sf_plan_milestone` call. Do not bury this inside the meeting prose.
When the milestone is product-facing, workflow-facing, developer-experience, or market-positioning, include `productResearch` in the `plan_milestone` call. Do not bury this inside the meeting prose.
Required fields:
- `purpose`: why this research exists for this milestone
@ -137,7 +137,7 @@ Apply these when decomposing and ordering slices:
## Scheduled Follow-ups
Milestones can declare temporal follow-ups via an optional `schedule` field passed to `sf_plan_milestone`. These create schedule entries that fire at specific times without manual intervention.
Milestones can declare temporal follow-ups via an optional `schedule` field passed to `plan_milestone`. These create schedule entries that fire at specific times without manual intervention.
```yaml
schedule:
@ -170,7 +170,7 @@ Use schedule follow-ups for: post-launch adoption checks, periodic audits, re-va
If the roadmap has only one slice, also plan the slice and its tasks inline during this unit - don't leave them for a separate planning session.
1. After `sf_plan_milestone` returns, immediately call `sf_plan_slice` for S01 with the full task breakdown
1. After `plan_milestone` returns, immediately call `plan_slice` for S01 with the full task breakdown
2. Use the **Slice Plan** and **Task Plan** output templates from the inlined context above to structure the tool call parameters
3. For simple slices, keep the plan lean - omit Proof Level, Integration Closure, and Observability sections if they would all be "none". Executable verification commands are sufficient.

View file

@ -54,7 +54,7 @@ Narrate your decomposition reasoning — why you're grouping work this way, what
### Output Contract: planningMeeting
The `sf_plan_slice` payload MUST include `planningMeeting` as a populated object. Empty, null, or missing planningMeeting is not acceptable.
The `plan_slice` payload MUST include `planningMeeting` as a populated object. Empty, null, or missing planningMeeting is not acceptable.
Use the canonical M004 meeting roles: Trigger, Product Manager, User Advocate, Customer Panel, Business, Researcher, Delivery Lead, Partner, Combatant, Architect, Moderator, Recommended Route, and Confidence. The tool's Product Manager field is named `pm`, and the Confidence field is named `confidenceSummary`; keep existing tool field names while covering the canonical roles.
@ -91,7 +91,7 @@ Then:
- Observability Impact section **only if the task touches runtime boundaries, async flows, or error paths** — omit it otherwise
- Interface Impact section **only if the task changes a surface, protocol, output format, run-control mode, or permission profile** — omit it otherwise
- Swarm guidance when relevant: if a task can safely split into 2-3 independent execution shards, say so in the task plan's Steps or Description with explicit file/directory ownership per shard. If the work touches shared interfaces, lockfiles, migrations, generated artifacts, or sequence-dependent code, state that it should execute single-agent.
7. **Run adversarial review before persisting the plan.** Record all three lenses in the `adversarialReview` payload you send to `sf_plan_slice`. Each role has a purpose and depth contract — a review that agrees without raising specific objections is a rubber stamp, not a review.
7. **Run adversarial review before persisting the plan.** Record all three lenses in the `adversarialReview` payload you send to `plan_slice`. Each role has a purpose and depth contract — a review that agrees without raising specific objections is a rubber stamp, not a review.
- **Partner:** strongest case for why this plan is sufficient. *Must cite specific evidence from the code you explored — file paths, function names, test coverage gaps, or prior slice learnings. Not just "the plan looks good."*
- **Combatant:** attack the premise first. *Must name at least 3 plausible alternative root causes, failure modes, or plan-shape mistakes. Each must have a concrete scenario (not "might fail") and the cheapest falsifier for each.*
- **Architect:** after reading partner + combatant, state the system-fit risk, sequencing risk, or missing integration proof. *Must name the specific subsystems and coupling points this plan touches, and identify at least one integration seam that could break.*
@ -109,7 +109,7 @@ Then:
- **Recommended Route:** one of `discussing`, `researching`, `planning`
- **Confidence:** concise post-meeting confidence summary
- Keep it bounded: one round is normal, two is the limit. If the meeting route is `discussing` or `researching`, persist the draft anyway so the system keeps the context, but do not pretend the slice is execution-ready.
9. **Persist planning state through `sf_plan_slice`.** Call it with the full slice planning payload (goal, adversarialReview, populated planningMeeting, demo, must-haves, verification, tasks, and metadata). The tool inserts all tasks in the same transaction, writes to the DB, and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` files automatically. Do **not** call `sf_plan_task` separately — `sf_plan_slice` handles task persistence. Do **not** rely on direct `PLAN.md` writes as the source of truth; the DB-backed tool is the canonical write path for slice and task planning state.
9. **Persist planning state through `plan_slice`.** Call it with the full slice planning payload (goal, adversarialReview, populated planningMeeting, demo, must-haves, verification, tasks, and metadata). The tool inserts all tasks in the same transaction, writes to the DB, and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` files automatically. Do **not** call `plan_task` separately — `plan_slice` handles task persistence. Do **not** rely on direct `PLAN.md` writes as the source of truth; the DB-backed tool is the canonical write path for slice and task planning state.
10. **Self-audit the plan.** Walk through each check — if any fail, fix the plan files before moving on:
- **Completion semantics:** If every task were completed exactly as written, the slice goal/demo should actually be true.
- **Requirement coverage:** Every must-have in the slice maps to at least one task. No must-have is orphaned. If `REQUIREMENTS.md` exists, every Active requirement this slice owns maps to at least one task.
@ -127,9 +127,9 @@ Then:
The slice directory and tasks/ subdirectory already exist. Do NOT mkdir. All work stays in your working directory: `{{workingDirectory}}`.
**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in autonomous mode — there is no human available to answer questions. Make reasonable assumptions and document them in the plan. If a decision genuinely requires human input, write a note in the relevant task's description and call `sf_plan_slice` with what you have.
**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in autonomous mode — there is no human available to answer questions. Make reasonable assumptions and document them in the plan. If a decision genuinely requires human input, write a note in the relevant task's description and call `plan_slice` with what you have.
**You MUST call `sf_plan_slice` to persist the planning state before finishing.**
**You MUST call `plan_slice` to persist the planning state before finishing.**
### Report sf-internal observations

View file

@ -23,7 +23,7 @@ You must inspect the codebase enough to confirm whether declared product capabil
- Use text search (`rg`) for docs, deployment scripts, runbooks, CI workflows, build targets, and evidence strings.
- Use DeepWiki, Context7, package-intelligence, or project-specific documentation tools when configured for external library/package questions. Do not route local repo file/symbol work through MCP when native `lsp`, AST, and text search cover it.
- Do not rely only on preloaded docs. If a required capability is declared, look for concrete implementation, tests, config, deploy, and operational evidence.
- Do not edit source files in this audit. The only write path is the `sf_product_audit` tool.
- Do not edit source files in this audit. The only write path is the `audit_product` tool.
## Evidence Rules
@ -37,7 +37,7 @@ You must inspect the codebase enough to confirm whether declared product capabil
## Required Output
Call `sf_product_audit` exactly once with:
Call `audit_product` exactly once with:
```json
{

View file

@ -8,7 +8,7 @@ Before asking "What do you want to add?", check the existing milestones context
1. Tell the user which milestones have draft contexts and briefly summarize what each draft contains (read the draft file).
2. Use `ask_user_questions` to ask per-draft milestone:
- **"Discuss now"** — Treat this draft as the primary topic. Read the draft content, use it as seed material, and conduct a focused discussion following the standard discussion flow (reflection → investigation → questioning → depth verification → requirements → roadmap). After the discussion, call `sf_summary_save` with the milestone ID and `artifact_type: "CONTEXT"` to write the full context — then delete the `CONTEXT-DRAFT.md` file. The milestone is then ready for auto-planning.
- **"Discuss now"** — Treat this draft as the primary topic. Read the draft content, use it as seed material, and conduct a focused discussion following the standard discussion flow (reflection → investigation → questioning → depth verification → requirements → roadmap). After the discussion, call `save_summary` with the milestone ID and `artifact_type: "CONTEXT"` to write the full context — then delete the `CONTEXT-DRAFT.md` file. The milestone is then ready for auto-planning.
- **"Leave for later"** — Keep the draft as-is. The user will discuss it in a future session. Autonomous mode will continue to pause when it reaches this milestone.
3. Handle all draft discussions before proceeding to new queue work.
4. If no drafts exist in the context, skip this section entirely and proceed to "What do you want to add?"
@ -111,7 +111,7 @@ The user confirms or corrects before you write. One depth verification per miles
Once the user is satisfied, in a single pass for **each** new milestone:
1. Call `new_milestone_id` to get the milestone ID — never invent milestone IDs manually. Then `mkdir -p .sf/milestones/<ID>/slices`.
2. Call `sf_summary_save` with `milestone_id: <ID>`, `artifact_type: "CONTEXT"`, and the full context markdown as `content` — the tool computes the file path and persists to both DB and disk. Capture intent, scope, risks, constraints, integration points, and relevant requirements in the content. Mark the status as "Queued — pending autonomous mode execution." **If this milestone depends on other milestones, include YAML frontmatter with `depends_on` in the content:**
2. Call `save_summary` with `milestone_id: <ID>`, `artifact_type: "CONTEXT"`, and the full context markdown as `content` — the tool computes the file path and persists to both DB and disk. Capture intent, scope, risks, constraints, integration points, and relevant requirements in the content. Mark the status as "Queued — pending autonomous mode execution." **If this milestone depends on other milestones, include YAML frontmatter with `depends_on` in the content:**
```yaml
---
depends_on: [M001, M002]

View file

@ -23,7 +23,7 @@ You are executing a SF quick task — a lightweight, focused unit of work outsid
- Commit logical units separately if the task involves distinct changes.
- Quick tasks run outside the autonomous mode lifecycle — there is no system auto-commit, so commit directly here.
7. Write a brief summary to `{{summaryPath}}`:
- Quick tasks operate outside the milestone/slice/task DB structure, so `sf_summary_save` (which requires a `milestone_id`) cannot be used here. Write the file directly.
- Quick tasks operate outside the milestone/slice/task DB structure, so `save_summary` (which requires a `milestone_id`) cannot be used here. Write the file directly.
```markdown
# Quick Task: {{description}}

View file

@ -26,7 +26,7 @@ You are executing **multiple tasks in parallel** for this slice. The task graph
2. **Wait for all subagents** to complete.
3. **Verify each dispatched task's outputs** — check that expected files were created/modified, that verification commands pass where applicable, and that each task wrote its own `T##-SUMMARY.md`.
4. **Do not rewrite successful task summaries or duplicate completion tool calls.** Treat a subagent-written summary as authoritative for that task.
5. **If a failed task produced no summary, call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, the failed task's `task_id`, and `artifact_type: "SUMMARY"` — include `blocker_discovered: true` and clear failure details in the `content`. Do NOT call `sf_task_complete` for the failed task — leave it uncompleted so replan/retry has an authoritative record.
5. **If a failed task produced no summary, call `save_summary`** with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, the failed task's `task_id`, and `artifact_type: "SUMMARY"` — include `blocker_discovered: true` and clear failure details in the `content`. Do NOT call `complete_task` for the failed task — leave it uncompleted so replan/retry has an authoritative record.
6. **Preserve successful sibling tasks exactly as they landed.** Do not roll back good work because another parallel task failed.
7. **Do NOT create a batch commit.** The surrounding unit lifecycle owns commits; this parent batch agent should not invent a second commit layer.
8. **Report the batch outcome** — which tasks succeeded, which failed, and any output collisions or dependency surprises.

View file

@ -54,7 +54,7 @@ If all criteria have at least one remaining owning slice, the coverage check pas
**If the roadmap is still good:**
Use `sf_reassess_roadmap` with `verdict: "roadmap-confirmed"`, an empty `sliceChanges` object, and the assessment text — the tool writes the assessment to the DB and renders `{{assessmentPath}}`. If requirements exist, explicitly note whether requirement coverage remains sound.
Use `reassess_roadmap` with `verdict: "roadmap-confirmed"`, an empty `sliceChanges` object, and the assessment text — the tool writes the assessment to the DB and renders `{{assessmentPath}}`. If requirements exist, explicitly note whether requirement coverage remains sound.
### Parallel Follow-up Rule
@ -68,13 +68,13 @@ If the milestone is already on track to be honestly complete, prefer creating a
**If changes are needed:**
**Persist changes through `sf_reassess_roadmap`.** Pass: `milestoneId`, `completedSliceId`, `verdict` (e.g. "roadmap-adjusted"), `assessment` (text explaining the decision), and `sliceChanges` with `modified` (array of sliceId, title, risk, depends, demo), `added` (same shape), `removed` (array of slice ID strings). The tool structurally enforces preservation of completed slices, writes the assessment to the DB, re-renders `{{roadmapPath}}`, and renders `{{assessmentPath}}`.
**Persist changes through `reassess_roadmap`.** Pass: `milestoneId`, `completedSliceId`, `verdict` (e.g. "roadmap-adjusted"), `assessment` (text explaining the decision), and `sliceChanges` with `modified` (array of sliceId, title, risk, depends, demo), `added` (same shape), `removed` (array of slice ID strings). The tool structurally enforces preservation of completed slices, writes the assessment to the DB, re-renders `{{roadmapPath}}`, and renders `{{assessmentPath}}`.
If `.sf/REQUIREMENTS.md` exists and requirement ownership or status changed, update it.
{{commitInstruction}}
**DB access safety:** Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')`. Use `sf_milestone_status` to read current milestone and slice state. All roadmap mutations go through `sf_reassess_roadmap` — the tool writes to the DB and re-renders ROADMAP.md atomically.
**DB access safety:** Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')`. Use `milestone_status` to read current milestone and slice state. All roadmap mutations go through `reassess_roadmap` — the tool writes to the DB and re-renders ROADMAP.md atomically.
### Report sf-internal observations

View file

@ -65,16 +65,16 @@ Then:
3. Define slice-level verification — the objective stopping condition. Plan real test files with real assertions; for simple slices, executable commands are fine.
4. For non-trivial slices, plan observability / proof level / integration closure, threat surface, and requirement impact. Omit entirely for simple slices.
5. Decompose the slice into tasks that fit one context window each. Every task must have Why / Files / Do / Verify / Done-when, plus a task plan with description, steps, must-haves, verification, inputs (backtick-wrapped paths), and expected output (backtick-wrapped paths).
6. **Persist planning state through `sf_plan_slice`.** Call it with the full payload. The tool writes to the DB and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` automatically. Do NOT rely on direct `PLAN.md` writes.
6. **Persist planning state through `plan_slice`.** Call it with the full payload. The tool writes to the DB and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` automatically. Do NOT rely on direct `PLAN.md` writes.
7. **Self-audit the plan.** If every task were completed exactly as written, the slice goal/demo should actually be true. Every must-have maps to at least one task. Inputs and Expected Output are backtick-wrapped file paths.
8. If refinement produced structural decisions that diverge from the sketch, append them to `.sf/DECISIONS.md`.
9. {{commitInstruction}}
The slice directory and tasks/ subdirectory already exist. Do NOT mkdir.
**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in autonomous mode — there is no human available to answer questions. Make reasonable assumptions and document them in the plan. If a decision genuinely requires human input, write a note in the relevant task's description and call `sf_plan_slice` with what you have.
**Autonomous execution:** Do not call `ask_user_questions` or `secure_env_collect`. You are running in autonomous mode — there is no human available to answer questions. Make reasonable assumptions and document them in the plan. If a decision genuinely requires human input, write a note in the relevant task's description and call `plan_slice` with what you have.
**You MUST call `sf_plan_slice` to persist the planning state before finishing.** After it returns successfully, the pipeline will automatically clear the sketch flag on the next state derivation (the on-disk PLAN file is the signal).
**You MUST call `plan_slice` to persist the planning state before finishing.** After it returns successfully, the pipeline will automatically clear the sketch flag on the next state derivation (the on-disk PLAN file is the signal).
When done, say: "Slice {{sliceId}} refined."

View file

@ -40,7 +40,7 @@ You are the release agent. Your job is to version, tag, changelog, and optionall
7. **Record to DB** — INSERT into `release_records (id, milestone_id, version, prev_version, changelog_entry, git_tag, published, created_at)`.
8. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "RELEASE"`, and the changelog entry as content.
8. **Call `save_summary`** with `milestone_id: {{milestoneId}}`, `artifact_type: "RELEASE"`, and the changelog entry as content.
### On failure

View file

@ -32,7 +32,7 @@ Consider these captures when rewriting the remaining tasks — they represent th
1. Read the blocker task summary carefully. Understand exactly what was discovered and why it blocks the current plan.
2. Analyze the remaining `[ ]` tasks in the slice plan. Determine which are still valid, which need modification, and which should be replaced.
3. **Persist replan state through `sf_replan_slice`.** Call it with: `milestoneId`, `sliceId`, `blockerTaskId`, `blockerDescription`, `whatChanged`, optional updated slice-level fields (`goal`, `successCriteria`, `proofLevel`, `integrationClosure`, `observabilityImpact`, `adversarialReview`, `planningMeeting`), `updatedTasks` (array of task objects with taskId, title, description, estimate, files, verify, inputs, expectedOutput), and `removedTaskIds` (array of task ID strings). The tool structurally enforces preservation of completed tasks, writes replan history to the DB, re-renders `{{planPath}}`, and renders `{{replanPath}}`. Preserve or update the Threat Surface and Requirement Impact sections if the replan changes the slice's security posture or requirement coverage. If the blocker changes the strongest objection, partner case, or moderator route, update the slice-level ceremony state in the same call so replanning does not leave stale review context behind.
3. **Persist replan state through `replan_slice`.** Call it with: `milestoneId`, `sliceId`, `blockerTaskId`, `blockerDescription`, `whatChanged`, optional updated slice-level fields (`goal`, `successCriteria`, `proofLevel`, `integrationClosure`, `observabilityImpact`, `adversarialReview`, `planningMeeting`), `updatedTasks` (array of task objects with taskId, title, description, estimate, files, verify, inputs, expectedOutput), and `removedTaskIds` (array of task ID strings). The tool structurally enforces preservation of completed tasks, writes replan history to the DB, re-renders `{{planPath}}`, and renders `{{replanPath}}`. Preserve or update the Threat Surface and Requirement Impact sections if the replan changes the slice's security posture or requirement coverage. If the blocker changes the strongest objection, partner case, or moderator route, update the slice-level ceremony state in the same call so replanning does not leave stale review context behind.
4. If any incomplete task had a `T0x-PLAN.md`, remove or rewrite it to match the new task description.
5. Do not commit manually — the system auto-commits your changes after this unit completes.

View file

@ -45,7 +45,7 @@ Research the codebase and relevant technologies. Narrate key findings and surpri
5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — try DeepWiki → Context7 → web search in that order. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
6. Use the **Research** output template from the inlined context above — include only sections that have real content
7. If `.sf/REQUIREMENTS.md` exists, research against it. Identify which Active requirements are table stakes, likely omissions, overbuilt risks, or domain-standard behaviors the user may or may not want.
8. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "RESEARCH"`, and the full research markdown as `content` — the tool computes the file path and persists to both DB and disk.
8. Call `save_summary` with `milestone_id: {{milestoneId}}`, `artifact_type: "RESEARCH"`, and the full research markdown as `content` — the tool computes the file path and persists to both DB and disk.
## Strategic Questions to Answer
@ -59,8 +59,8 @@ Research the codebase and relevant technologies. Narrate key findings and surpri
**Research is advisory, not auto-binding.** Surface candidate requirements clearly instead of silently expanding scope.
**You MUST call `sf_summary_save` with the research content before finishing.**
After `sf_summary_save` succeeds, do **not** call `new_milestone_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool. The orchestrator dispatches planner units after research.
**You MUST call `save_summary` with the research content before finishing.**
After `save_summary` succeeds, do **not** call `new_milestone_id`, `plan_milestone`, `plan_slice`, `plan_task`, or any planning/creation tool. The orchestrator dispatches planner units after research.
When done, say only: "Milestone {{milestoneId}} researched."

View file

@ -50,12 +50,12 @@ Research what this slice needs. Narrate key findings and surprises as you go —
4. **Documentation lookup — prefer DeepWiki first.** Use `ask_question` / `read_wiki_structure` / `read_wiki_contents` (DeepWiki) as the default for any GitHub-hosted library or framework — AI-indexed, no free-tier cap. Fall back to `resolve_library``get_library_docs` (Context7) for npm/pypi/crates packages DeepWiki doesn't have. **Context7 free tier is capped at 1000 requests/month — spend those on cases DeepWiki can't cover.** Skip both for libraries already used in this codebase.
5. **Web search budget:** You have a limited budget of web searches (max ~15 per session). Use them strategically — try DeepWiki → Context7 → web search in that order. Do NOT repeat the same or similar queries. If a search didn't find what you need, rephrase once or move on. Target 3-5 total web searches for a typical research unit.
6. Use the **Research** output template from the inlined context above — include only sections that have real content. The template is already inlined above; do NOT attempt to read any template file from disk (there is no `templates/SLICE-RESEARCH.md` — the correct template is already present in this prompt).
7. Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the full research markdown as `content` — the tool computes the file path and persists to both DB and disk.
7. Call `save_summary` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "RESEARCH"`, and the full research markdown as `content` — the tool computes the file path and persists to both DB and disk.
The slice directory already exists at `{{slicePath}}/`. Do NOT mkdir.
**You MUST call `sf_summary_save` with the research content before finishing.**
After `sf_summary_save` succeeds, stop immediately. Do **not** call `new_milestone_id`, `sf_plan_milestone`, `sf_plan_slice`, `sf_plan_task`, or any planning/creation tool. The orchestrator dispatches planner units after research.
**You MUST call `save_summary` with the research content before finishing.**
After `save_summary` succeeds, stop immediately. Do **not** call `new_milestone_id`, `plan_milestone`, `plan_slice`, `plan_task`, or any planning/creation tool. The orchestrator dispatches planner units after research.
### Report sf-internal observations

View file

@ -46,14 +46,14 @@ reason: "<reason>"
Remove the `{ID}-PARKED.md` file from the milestone directory to reactivate it.
### Skip a slice
Mark a slice as skipped so autonomous mode advances past it without executing. **You MUST call the `sf_skip_slice` tool** — editing the roadmap markdown alone is NOT sufficient because autonomous mode reads slice status from the database, not the roadmap file:
Mark a slice as skipped so autonomous mode advances past it without executing. **You MUST call the `skip_slice` tool** — editing the roadmap markdown alone is NOT sufficient because autonomous mode reads slice status from the database, not the roadmap file:
```
sf_skip_slice({ milestoneId: "M003", sliceId: "S02", reason: "Descoped — feature moved to M005" })
skip_slice({ milestoneId: "M003", sliceId: "S02", reason: "Descoped — feature moved to M005" })
```
Skipped slices are treated as closed by the state machine (like "complete" but distinct). Use when a slice is no longer needed or has been superseded. The slice data is preserved for reference.
**Do NOT** just check the slice checkbox in the roadmap — this does not update the DB and autonomous mode will resume the slice.
**CRITICAL — Non-bypassable gate:** Skipping a slice is a permanent DB operation. You MUST confirm with the user before calling `sf_skip_slice`. If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed without explicit approval.
**CRITICAL — Non-bypassable gate:** Skipping a slice is a permanent DB operation. You MUST confirm with the user before calling `skip_slice`. If the user does not respond or gives an ambiguous answer, you MUST re-ask — never proceed without explicit approval.
### Discard a milestone
**Permanently** delete a milestone directory and prune it from the DB-backed
@ -62,7 +62,7 @@ queue order.
**CRITICAL — Non-bypassable gate:** Discarding is irreversible. You MUST confirm with the user before discarding. Warn explicitly if the milestone has completed work. If the user does not respond or gives an ambiguous answer, you MUST re-ask — never rationalize past the block. A missing confirmation is a "do not discard."
### Add a new milestone
Use the `new_milestone_id` tool to get the next ID, then call `sf_summary_save` with `milestone_id: {ID}`, `artifact_type: "CONTEXT"`, and the scope/goals/success criteria as `content` — the tool writes the context file to disk and persists to DB. Update the DB-backed queue order to place it at the desired position.
Use the `new_milestone_id` tool to get the next ID, then call `save_summary` with `milestone_id: {ID}`, `artifact_type: "CONTEXT"`, and the scope/goals/success criteria as `content` — the tool writes the context file to disk and persists to DB. Update the DB-backed queue order to place it at the desired position.
### Update dependencies
Edit `depends_on` in the YAML frontmatter of a milestone's `{ID}-CONTEXT.md` file. For example:

View file

@ -40,7 +40,7 @@ You are the rollback agent. A smoke test failed after deployment. Your job is to
5. **Update the deploy run** — UPDATE `deploy_runs SET status = 'rolled-back' WHERE id = '{{deployRunId}}'`.
6. **Call `sf_summary_save`** with `milestone_id: {{milestoneId}}`, `artifact_type: "ROLLBACK"`, and the rollback report as content.
6. **Call `save_summary`** with `milestone_id: {{milestoneId}}`, `artifact_type: "ROLLBACK"`, and the rollback report as content.
### After rollback

View file

@ -55,7 +55,7 @@ After running all checks, compute the **overall verdict**:
- `FAIL` — one or more automatable checks failed
- `PARTIAL` — one or more automatable checks were skipped or returned inconclusive results (not the same as `NEEDS-HUMAN` — use PARTIAL only when the agent itself could not determine pass/fail for a check it was supposed to automate)
Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "ASSESSMENT"`, and the full UAT result markdown as `content` — the tool computes the file path and persists to both DB and disk. The content should follow this format:
Call `save_summary` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "ASSESSMENT"`, and the full UAT result markdown as `content` — the tool computes the file path and persists to both DB and disk. The content should follow this format:
```markdown
---
@ -84,7 +84,7 @@ date: <ISO 8601 timestamp>
---
**You MUST call `sf_summary_save` with the UAT result content before finishing.**
**You MUST call `save_summary` with the UAT result content before finishing.**
### Report sf-internal observations

View file

@ -43,7 +43,7 @@ INSERT a row into `smoke_results`:
- `checks_json`: JSON array of `{ check, result, evidence }` objects
- `created_at` / `finished_at`
Call `sf_summary_save` with `milestone_id: {{milestoneId}}`, `artifact_type: "SMOKE"`, and the full smoke result markdown.
Call `save_summary` with `milestone_id: {{milestoneId}}`, `artifact_type: "SMOKE"`, and the full smoke result markdown.
### On failure

View file

@ -190,7 +190,7 @@ Templates showing the expected format for each artifact type are in:
- Never guess at library APIs from training data — use `get_library_docs`.
- Never ask the user to run a command, set a variable, or check something you can check yourself.
- Never await stale async jobs after editing source — `cancel_job` them first, then re-run.
- Never query `.sf/sf.db` directly via `sqlite3`, `better-sqlite3`, or `node -e require('better-sqlite3')` — the database uses a single-writer WAL connection managed by the engine. Direct access causes reader/writer conflicts and bypasses validation logic. Use `sf_milestone_status`, `sf_journal_query`, or other `sf_*` tools exclusively for all DB reads and writes.
- Never query `.sf/sf.db` directly via `sqlite3`, `better-sqlite3`, or `node -e require('better-sqlite3')` — the database uses a single-writer WAL connection managed by the engine. Direct access causes reader/writer conflicts and bypasses validation logic. Use `milestone_status`, `query_journal`, or other `sf_*` tools exclusively for all DB reads and writes.
### Ask vs infer

View file

@ -74,7 +74,7 @@ Do not use validation to smuggle every useful future test into the current miles
### Step 3 — Persist Validation
Prepare the validation content you will pass to `sf_validate_milestone`. Do **not** manually write `{{validationPath}}` — the DB-backed tool is the canonical write path and renders the validation file for you.
Prepare the validation content you will pass to `validate_milestone`. Do **not** manually write `{{validationPath}}` — the DB-backed tool is the canonical write path and renders the validation file for you.
```markdown
---
@ -101,16 +101,16 @@ reviewers: 3
<if verdict is not pass: specific actions required>
```
Call `sf_validate_milestone` with the camelCase fields `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verdictRationale`, and `remediationPlan` when needed. If you include verification-class analysis, pass it in `verificationClasses`.
Call `validate_milestone` with the camelCase fields `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verdictRationale`, and `remediationPlan` when needed. If you include verification-class analysis, pass it in `verificationClasses`.
Extract the `Verification Classes` subsection from Reviewer C and pass it verbatim in `verificationClasses` so the persisted validation output uses the canonical class names `Contract`, `Integration`, `Operational`, and `UAT`.
**DB access safety:** Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `sf_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `sf_*` tools. Direct DB access corrupts the WAL and bypasses tool-level validation.
**DB access safety:** Do NOT query `.sf/sf.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `sf_*` tools. Direct DB access corrupts the WAL and bypasses tool-level validation.
If verdict is `needs-remediation`:
- Use `sf_reassess_roadmap` to add the remediation slices instead of editing `{{roadmapPath}}` manually
- Use `reassess_roadmap` to add the remediation slices instead of editing `{{roadmapPath}}` manually
- Those slices will be planned and executed before validation re-runs
**You MUST call `sf_validate_milestone` before finishing. Do not manually write `{{validationPath}}`.**
**You MUST call `validate_milestone` before finishing. Do not manually write `{{validationPath}}`.**
**File system safety:** When scanning milestone directories for evidence, use `ls` or `find` to list directory contents first — never pass a directory path (e.g. `tasks/`, `slices/`) directly to the `read` tool. The `read` tool only accepts file paths, not directories.

View file

@ -272,7 +272,7 @@ export function parseRoadmap(content) {
});
}
// Format B: "## Slice Overview" table format emitted by workflow-projections
// (sf_plan_milestone). Used as a fallback when format A produced nothing,
// (plan_milestone). Used as a fallback when format A produced nothing,
// so a roadmap that contains both H3 and table sections is parsed once.
if (slices.length === 0) {
const overviewBody = sections["Slice Overview"] ?? "";

View file

@ -357,7 +357,7 @@ function formatRecoveryPrompt(unitType, unitId, trace, gitChanges) {
sections.push(
"",
"### Complete-Milestone Recovery Rule",
"For milestone closeout, do not inspect git history or compute merge-base diffs on resume. Use the existing validation verdict, slice summaries, and already-run requirement updates. If validation passed and all slices are complete, call `sf_complete_milestone` with a concise summary instead of continuing branch archaeology.",
"For milestone closeout, do not inspect git history or compute merge-base diffs on resume. Use the existing validation verdict, slice summaries, and already-run requirement updates. If validation passed and all slices are complete, call `complete_milestone` with a concise summary instead of continuing branch archaeology.",
);
}
return sections.join("\n");

View file

@ -192,7 +192,7 @@ After any architecture analysis or significant decision, update:
- Architecture map (C4 Level 1-2 in text)
- Module coupling findings
**`.sf/DECISIONS.md`** (via `sf_decision_save` tool)
**`.sf/DECISIONS.md`** (via `save_decision` tool)
- All significant architectural decisions
- Automatically regenerated by the tool

View file

@ -66,7 +66,7 @@ Every change connects to a real system need. Establish:
```bash
rg -nF "<symbol or feature name>" src/ packages/
ls .sf/milestones/ 2>/dev/null
sf_milestone_status # if running inside sf
milestone_status # if running inside sf
```
Search prior memory:

View file

@ -134,7 +134,7 @@ If the `memories` table has thousands of rows:
If `.sf/DECISIONS.md` and `docs/dev/ADR-*.md` diverge:
- ADRs are the human-readable trail; DECISIONS.md is sf's tool-managed copy.
- Use `sf_decision_save` to update DECISIONS.md (it regenerates the file). Update the matching ADR by hand.
- Use `save_decision` to update DECISIONS.md (it regenerates the file). Update the matching ADR by hand.
- If they're already drifted, write a synthesis ADR pointing at both and supersede the older entries.
## Rules

View file

@ -22,7 +22,7 @@ This skill does NOT decide whether a slice is shippable in isolation — that's
- After `spec-first-tdd` reaches GREEN on the contract test.
- After a debugging fix from `systematic-debugging`.
- Before calling `sf_task_complete` or `sf_slice_complete`.
- Before calling `complete_task` or `complete_slice`.
- Inside an autonomous iteration loop, between slices.
If used inside an autonomous iteration loop and the user goal is still in progress:

View file

@ -81,7 +81,7 @@ wait for reset or continue with local evidence.
**SF project state queries:**
Use the runtime query tools instead of opening `.sf/sf.db` directly:
- `sf_milestone_status` — read milestone, slice, and task status inside an agent session.
- `milestone_status` — read milestone, slice, and task status inside an agent session.
- `sf headless query` — get the full DB-backed project snapshot when running from the shell.
- `/inspect db` — inspect schema/version diagnostics when the user asks for database health.
@ -139,7 +139,7 @@ The SF database contains the canonical project state, but agents should inspect
# Full state snapshot from shell
sf headless query
# In an agent session, call the DB-backed `sf_milestone_status` tool
# In an agent session, call the DB-backed `milestone_status` tool
# with milestoneId=M001 when you need a focused milestone snapshot.
```

View file

@ -47,7 +47,7 @@ phases:
# summaries as context. When false (default), all slices get full plans up
# front. (Note: SF's local ADR-011 is "Swarm Chat" — unrelated.)
progressive_planning:
# SF ADR-011 P2: mid-execution escalation. When true, sf_task_complete honors
# SF ADR-011 P2: mid-execution escalation. When true, complete_task honors
# an optional escalation: { question, options, recommendation, ... } payload.
# The agent's choice carries forward as a hard constraint into the next
# executor. See escalation_auto_accept for whether autonomous mode pauses or

View file

@ -108,7 +108,7 @@ describe("complete-slice prompt", () => {
"| T01 | done | .sf/milestones/M900/slices/S01/tasks/T01-SUMMARY.md | Schedule docs exist. | passed | docs/specs/sf-schedule.md |",
);
expect(prompt).toContain(
"verify the slice-level contract once and call `sf_slice_complete`",
"verify the slice-level contract once and call `complete_slice`",
);
expect(prompt).toContain("Do not reopen planning");
});

View file

@ -1,5 +1,5 @@
/**
* complete-milestone handler the core operation behind sf_complete_milestone.
* complete-milestone handler the core operation behind complete_milestone.
*
* Validates all slices are complete, updates milestone status in DB,
* renders MILESTONE-SUMMARY.md to disk, stores rendered markdown in DB

View file

@ -1,5 +1,5 @@
/**
* complete-slice handler the core operation behind sf_slice_complete.
* complete-slice handler the core operation behind complete_slice.
*
* Validates inputs, checks all tasks are complete, atomically renders
* SUMMARY.md + UAT.md to disk, then writes the slice row to DB in a
@ -532,7 +532,7 @@ export async function handleCompleteSlice(paramsInput, basePath) {
);
invalidateStateCache();
return {
error: `database update failed after SUMMARY.md/UAT.md write succeeded at ${summaryPath}: ${msg}. Files were kept; retry sf_slice_complete after fixing the DB.`,
error: `database update failed after SUMMARY.md/UAT.md write succeeded at ${summaryPath}: ${msg}. Files were kept; retry complete_slice after fixing the DB.`,
};
}
// Toggle roadmap checkbox via renderer module after DB status is updated.

View file

@ -1,5 +1,5 @@
/**
* complete-task handler the core operation behind sf_task_complete.
* complete-task handler the core operation behind complete_task.
*
* Validates inputs, atomically renders SUMMARY.md to disk, then writes the
* task row to DB in a transaction, toggles the plan checkbox, and invalidates
@ -450,7 +450,7 @@ export async function handleCompleteTask(paramsInput, basePath) {
);
invalidateStateCache();
return {
error: `database update failed after SUMMARY.md write succeeded at ${summaryPath}: ${msg}. SUMMARY.md was kept; retry sf_task_complete after fixing the DB.`,
error: `database update failed after SUMMARY.md write succeeded at ${summaryPath}: ${msg}. SUMMARY.md was kept; retry complete_task after fixing the DB.`,
};
}
// Toggle plan checkbox via renderer module after DB status is updated.

View file

@ -399,7 +399,7 @@ export async function handlePlanMilestone(rawParams, basePath) {
(s) => !incomingSliceIds.has(s.id),
);
if (droppedCompleted.length > 0) {
guardError = `cannot re-plan milestone ${params.milestoneId}: ${droppedCompleted.length} completed slice(s) would be dropped (${droppedCompleted.map((s) => s.id).join(", ")}). Use sf_reassess_roadmap to modify the roadmap.`;
guardError = `cannot re-plan milestone ${params.milestoneId}: ${droppedCompleted.length} completed slice(s) would be dropped (${droppedCompleted.map((s) => s.id).join(", ")}). Use reassess_roadmap to modify the roadmap.`;
return;
}
}

View file

@ -285,7 +285,7 @@ export async function handlePlanSlice(rawParams, basePath) {
adversarialReview: params.adversarialReview,
planningMeeting: params.planningMeeting,
});
// SF ADR-011: when sf_plan_slice runs against a sketch slice (refine-slice
// SF ADR-011: when plan_slice runs against a sketch slice (refine-slice
// produced a full plan from the sketch_scope hint), clear the is_sketch
// flag atomically with the plan write so the next dispatch cycle no
// longer routes to refine. Idempotent — no-op for non-sketches.

View file

@ -1,7 +1,7 @@
// SF — Product Completeness Audit tool
//
// Slim implementation of the milestone-end product-audit workflow phase.
// The tool name is `sf_product_audit`. It validates a structured audit
// The tool name is `audit_product`. It validates a structured audit
// payload (verdict + gaps) and writes the result to:
// .sf/active/{milestoneId}/PRODUCT-AUDIT.json
// .sf/active/{milestoneId}/PRODUCT-AUDIT.md

View file

@ -1,5 +1,5 @@
/**
* skip-slice handler the core operation behind sf_skip_slice.
* skip-slice handler the core operation behind skip_slice.
*
* Marks a slice as skipped and cascades the skip to every non-closed task in
* that slice. Without the task cascade the deep-check in

View file

@ -1,5 +1,5 @@
/**
* validate-milestone handler the core operation behind sf_validate_milestone.
* validate-milestone handler the core operation behind validate_milestone.
*
* Persists milestone validation results to the assessments table and
* quality_gates table, renders VALIDATION.md to disk, and invalidates caches.

View file

@ -163,8 +163,8 @@ export async function executeSummarySave(params, basePath = process.cwd()) {
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `sf_summary_save tool failed: ${msg}`, {
tool: "sf_summary_save",
logError("tool", `save_summary tool failed: ${msg}`, {
tool: "save_summary",
error: String(err),
});
return {
@ -254,8 +254,8 @@ export async function executeTaskComplete(params, basePath = process.cwd()) {
escalationError = err instanceof Error ? err.message : String(err);
logError(
"tool",
`sf_task_complete escalation write failed: ${escalationError}`,
{ tool: "sf_task_complete", op: "escalation" },
`complete_task escalation write failed: ${escalationError}`,
{ tool: "complete_task", op: "escalation" },
);
}
}
@ -287,7 +287,7 @@ export async function executeTaskComplete(params, basePath = process.cwd()) {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `complete_task tool failed: ${msg}`, {
tool: "sf_task_complete",
tool: "complete_task",
error: String(err),
});
return {
@ -385,7 +385,7 @@ export async function executeSliceComplete(params, basePath = process.cwd()) {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `complete_slice tool failed: ${msg}`, {
tool: "sf_slice_complete",
tool: "complete_slice",
error: String(err),
});
return {
@ -440,7 +440,7 @@ export async function executeCompleteMilestone(
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `complete_milestone tool failed: ${msg}`, {
tool: "sf_complete_milestone",
tool: "complete_milestone",
error: String(err),
});
return {
@ -495,7 +495,7 @@ export async function executeValidateMilestone(
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `validate_milestone tool failed: ${msg}`, {
tool: "sf_validate_milestone",
tool: "validate_milestone",
error: String(err),
});
return {
@ -548,7 +548,7 @@ export async function executeReassessRoadmap(params, basePath = process.cwd()) {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `reassess_roadmap tool failed: ${msg}`, {
tool: "sf_reassess_roadmap",
tool: "reassess_roadmap",
error: String(err),
});
return {
@ -693,7 +693,7 @@ export async function executePlanMilestone(params, basePath = process.cwd()) {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `plan_milestone tool failed: ${msg}`, {
tool: "sf_plan_milestone",
tool: "plan_milestone",
error: String(err),
});
return {
@ -746,7 +746,7 @@ export async function executePlanSlice(params, basePath = process.cwd()) {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `plan_slice tool failed: ${msg}`, {
tool: "sf_plan_slice",
tool: "plan_slice",
error: String(err),
});
return {
@ -799,7 +799,7 @@ export async function executeReplanSlice(params, basePath = process.cwd()) {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `replan_slice tool failed: ${msg}`, {
tool: "sf_replan_slice",
tool: "replan_slice",
error: String(err),
});
return {
@ -864,7 +864,7 @@ export async function executeMilestoneStatus(params, basePath = process.cwd()) {
});
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logWarning("tool", `sf_milestone_status tool failed: ${msg}`);
logWarning("tool", `milestone_status tool failed: ${msg}`);
return {
content: [
{ type: "text", text: `Error querying milestone status: ${msg}` },

View file

@ -15,7 +15,7 @@ hard-block milestone completion — actionable gaps become follow-up slices.
</purpose>
<phases>
1. audit — Inspect code/tests/docs, score gaps, call sf_product_audit
1. audit — Inspect code/tests/docs, score gaps, call audit_product
</phases>
<process>
@ -31,7 +31,7 @@ hard-block milestone completion — actionable gaps become follow-up slices.
to confirm declared capabilities have concrete evidence (code, tests,
deployment artifacts, runbooks).
4. Score each gap with severity, confidence, and a suggested follow-up slice.
5. Call `sf_product_audit` exactly once with the structured payload. The tool
5. Call `audit_product` exactly once with the structured payload. The tool
writes:
- `.sf/active/{milestoneId}/PRODUCT-AUDIT.json` (machine-readable)
- `.sf/active/{milestoneId}/PRODUCT-AUDIT.md` (human-readable)

View file

@ -9,20 +9,20 @@
export function getRequiredWorkflowToolsForGuidedUnit(unitType) {
switch (unitType) {
case "discuss-milestone":
return ["sf_summary_save", "sf_plan_milestone"];
return ["save_summary", "plan_milestone"];
case "discuss-slice":
return ["sf_summary_save"];
return ["save_summary"];
case "research-milestone":
case "research-slice":
return ["sf_summary_save"];
return ["save_summary"];
case "plan-milestone":
return ["sf_plan_milestone"];
return ["plan_milestone"];
case "plan-slice":
return ["sf_plan_slice"];
return ["plan_slice"];
case "execute-task":
return ["sf_task_complete"];
return ["complete_task"];
case "complete-slice":
return ["sf_slice_complete"];
return ["complete_slice"];
default:
return [];
}
@ -30,31 +30,31 @@ export function getRequiredWorkflowToolsForGuidedUnit(unitType) {
export function getRequiredWorkflowToolsForAutoUnit(unitType) {
switch (unitType) {
case "discuss-milestone":
return ["sf_summary_save", "sf_plan_milestone"];
return ["save_summary", "plan_milestone"];
case "research-milestone":
case "research-slice":
case "run-uat":
return ["sf_summary_save"];
return ["save_summary"];
case "plan-milestone":
return ["sf_plan_milestone"];
return ["plan_milestone"];
case "plan-slice":
return ["sf_plan_slice"];
return ["plan_slice"];
case "execute-task":
case "execute-task-simple":
case "reactive-execute":
return ["sf_task_complete"];
return ["complete_task"];
case "complete-slice":
return ["sf_slice_complete"];
return ["complete_slice"];
case "replan-slice":
return ["sf_replan_slice"];
return ["replan_slice"];
case "reassess-roadmap":
return ["sf_milestone_status", "sf_reassess_roadmap"];
return ["milestone_status", "reassess_roadmap"];
case "gate-evaluate":
return ["record_gate"];
case "validate-milestone":
return ["sf_milestone_status", "sf_validate_milestone"];
return ["milestone_status", "validate_milestone"];
case "complete-milestone":
return ["sf_milestone_status", "sf_complete_milestone"];
return ["milestone_status", "complete_milestone"];
default:
return [];
}

View file

@ -87,9 +87,9 @@ function matchesBlockedPattern(path) {
* Directs the agent to use engine tool calls instead.
*/
export const BLOCKED_WRITE_ERROR = `Direct writes to .sf/STATE.md and .sf/sf.db are blocked. Use engine tool calls instead:
- To complete a task: call sf_task_complete(milestone_id, slice_id, task_id, summary)
- To complete a slice: call sf_slice_complete(milestone_id, slice_id, summary, uat_result)
- To save a decision: call sf_decision_save(scope, decision, choice, rationale)
- To complete a task: call complete_task(milestone_id, slice_id, task_id, summary)
- To complete a slice: call complete_slice(milestone_id, slice_id, summary, uat_result)
- To save a decision: call save_decision(scope, decision, choice, rationale)
- To start a task: call sf_start_task(milestone_id, slice_id, task_id)
- To record verification: call sf_record_verification(milestone_id, slice_id, task_id, evidence)
- To report a blocker: call sf_report_blocker(milestone_id, slice_id, task_id, description)`;