sf snapshot: uncommitted changes after 30m inactivity
This commit is contained in:
parent
1a681caa86
commit
6b7d327672
14 changed files with 216 additions and 52 deletions
BIN
.sf/backups/db/sf.db.2026-05-10T00-25-03-634Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-10T00-25-03-634Z
Normal file
Binary file not shown.
BIN
.sf/backups/db/sf.db.2026-05-10T00-49-50-037Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-10T00-49-50-037Z
Normal file
Binary file not shown.
BIN
.sf/backups/db/sf.db.2026-05-10T01-08-44-324Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-10T01-08-44-324Z
Normal file
Binary file not shown.
BIN
.sf/metrics.db
BIN
.sf/metrics.db
Binary file not shown.
|
|
@ -10,5 +10,27 @@
|
|||
"successRate": 1,
|
||||
"total": 4
|
||||
}
|
||||
},
|
||||
"plan-slice": {
|
||||
"zai/glm-4.5": {
|
||||
"successes": 1,
|
||||
"failures": 0,
|
||||
"timeouts": 0,
|
||||
"totalTokens": 0,
|
||||
"totalCost": 0,
|
||||
"lastUsed": "2026-05-10T00:25:29.268Z",
|
||||
"successRate": 1,
|
||||
"total": 1
|
||||
},
|
||||
"minimax/MiniMax-M2.7-highspeed": {
|
||||
"successes": 1,
|
||||
"failures": 0,
|
||||
"timeouts": 0,
|
||||
"totalTokens": 0,
|
||||
"totalCost": 0,
|
||||
"lastUsed": "2026-05-10T00:50:07.124Z",
|
||||
"successRate": 1,
|
||||
"total": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -287,6 +287,17 @@ function formatToolList(serverName, tools) {
|
|||
return lines.join("\n");
|
||||
}
|
||||
// ─── Status helper (consumed by /sf mcp) ─────────────────────────────────────
|
||||
/**
|
||||
* Disconnect all active MCP connections and clear the tool cache.
|
||||
* Servers will lazily reconnect on the next mcp_discover or mcp_call.
|
||||
*
|
||||
* Purpose: allow /mcp reload to pick up config changes without a full restart.
|
||||
* Consumer: /mcp reload command handler in commands-mcp-status.js.
|
||||
*/
|
||||
export async function disconnectAll() {
|
||||
await closeAll();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the live connection status for a named MCP server.
|
||||
* Safe to call even when the server has never been connected.
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ function getSessionStats(ctx) {
|
|||
}
|
||||
export function renderFooter(_theme, footerData, ctx, width) {
|
||||
const git = refreshGitStatus(process.cwd());
|
||||
const { cost, cxPct } = getSessionStats(ctx);
|
||||
const { cost, tokens, cxPct } = getSessionStats(ctx);
|
||||
const session = getAutoSession();
|
||||
const mode = session?.getMode?.();
|
||||
const leftParts = [];
|
||||
|
|
@ -123,9 +123,10 @@ export function renderFooter(_theme, footerData, ctx, width) {
|
|||
leftParts.push(chip("diff", `+${git.added}/-${git.deleted}`, "warning"));
|
||||
}
|
||||
if (git.ahead || git.behind) {
|
||||
leftParts.push(
|
||||
chip("sync", `${git.ahead} ahead ${git.behind} behind`, "warning"),
|
||||
);
|
||||
const syncParts = [];
|
||||
if (git.ahead) syncParts.push(`↑${git.ahead}`);
|
||||
if (git.behind) syncParts.push(`↓${git.behind}`);
|
||||
leftParts.push(chip("sync", syncParts.join(" "), "warning"));
|
||||
}
|
||||
if (git.lastCommit) {
|
||||
leftParts.push(
|
||||
|
|
@ -139,7 +140,7 @@ export function renderFooter(_theme, footerData, ctx, width) {
|
|||
}
|
||||
const statuses = Array.from(footerData.getExtensionStatuses().entries())
|
||||
.sort(([a], [b]) => a.localeCompare(b))
|
||||
.map(([, text]) => text.trim())
|
||||
.map(([, text]) => String(text ?? "").trim())
|
||||
.filter(Boolean);
|
||||
if (statuses.length) {
|
||||
leftParts.push(chip("status", statuses.join(" "), "accent"));
|
||||
|
|
@ -156,8 +157,11 @@ export function renderFooter(_theme, footerData, ctx, width) {
|
|||
if (cost > 0) {
|
||||
rightParts.push(chip("spent", `$${cost.toFixed(2)}`, "warning"));
|
||||
}
|
||||
const cxTone = cxPct >= 85 ? "error" : cxPct >= 60 ? "warning" : "success";
|
||||
rightParts.push(chip("ctx", `${Math.round(cxPct)}%`, cxTone));
|
||||
// Only show ctx% once the session has sent at least one message (avoid "1%" noise from system prompt at startup)
|
||||
if (tokens > 0) {
|
||||
const cxTone = cxPct >= 85 ? "error" : cxPct >= 60 ? "warning" : "success";
|
||||
rightParts.push(chip("ctx", `${Math.round(cxPct)}%`, cxTone));
|
||||
}
|
||||
let rightLine = join(rightParts);
|
||||
const maxRightWidth = Math.max(16, Math.floor(width * 0.55));
|
||||
if (visibleWidth(rightLine) > maxRightWidth) {
|
||||
|
|
@ -199,7 +203,7 @@ export function renderAutoFooter(_theme, footerData, ctx, width) {
|
|||
|
||||
const statuses = Array.from(footerData.getExtensionStatuses().entries())
|
||||
.sort(([a], [b]) => a.localeCompare(b))
|
||||
.map(([, text]) => text.trim())
|
||||
.map(([, text]) => String(text ?? "").trim())
|
||||
.filter(Boolean);
|
||||
if (statuses.length) {
|
||||
leftParts.push(ansiFg(SE.gray60, statuses.join(" ")));
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ function getLastCommit(cwd) {
|
|||
}
|
||||
function getDiffStats(cwd) {
|
||||
try {
|
||||
const raw = execFileSync("git", ["diff", "--stat"], {
|
||||
const raw = execFileSync("git", ["diff", "HEAD", "--stat"], {
|
||||
cwd,
|
||||
encoding: "utf-8",
|
||||
stdio: ["pipe", "pipe", "ignore"],
|
||||
|
|
@ -48,10 +48,11 @@ function getDiffStats(cwd) {
|
|||
let deleted = 0;
|
||||
let modified = 0;
|
||||
for (const line of raw.split("\n")) {
|
||||
const m = line.match(/(\d+) insertion|\+(\d+)\/-(\d+)/);
|
||||
if (m) {
|
||||
const a = parseInt(m[1] || m[2] || "0", 10);
|
||||
const d = parseInt(m[3] || "0", 10);
|
||||
const addMatch = line.match(/(\d+) insertion/);
|
||||
const delMatch = line.match(/(\d+) deletion/);
|
||||
if (addMatch || delMatch) {
|
||||
const a = addMatch ? parseInt(addMatch[1], 10) : 0;
|
||||
const d = delMatch ? parseInt(delMatch[1], 10) : 0;
|
||||
if (a) added += a;
|
||||
if (d) deleted += d;
|
||||
if (a || d) modified++;
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import {
|
|||
} from "../auto-tool-tracking.js";
|
||||
import {
|
||||
assessAutonomousSolverTurn,
|
||||
appendAutonomousSolverCheckpoint,
|
||||
beginAutonomousSolverIteration,
|
||||
buildAutonomousSolverMissingCheckpointRepairPrompt,
|
||||
buildAutonomousSolverPromptBlock,
|
||||
|
|
@ -2362,12 +2363,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
});
|
||||
}
|
||||
if (solverAssessment.action === "pause") {
|
||||
const missingCheckpointDiagnosis =
|
||||
solverAssessment.reason === "solver-missing-checkpoint"
|
||||
? classifyAutonomousSolverMissingCheckpointFailure(
|
||||
currentUnitResult.event?.messages ?? [],
|
||||
)
|
||||
: null;
|
||||
const isMissingCheckpoint =
|
||||
solverAssessment.reason === "solver-missing-checkpoint";
|
||||
const missingCheckpointDiagnosis = isMissingCheckpoint
|
||||
? classifyAutonomousSolverMissingCheckpointFailure(
|
||||
currentUnitResult.event?.messages ?? [],
|
||||
)
|
||||
: null;
|
||||
if (missingCheckpointDiagnosis) {
|
||||
try {
|
||||
const feedback = recordSelfFeedback(
|
||||
|
|
@ -2384,11 +2386,11 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
missingCheckpointDiagnosis.evidence ?? "",
|
||||
].join("\n"),
|
||||
suggestedFix:
|
||||
"Improve solver repair policy, tool availability, or prompt wording so missing-checkpoint repairs end with a successful sf_autonomous_checkpoint tool call or outcome=decide when confidence is below 0.98.",
|
||||
"Improve solver repair policy, tool availability, or prompt wording so missing-checkpoint repairs end with a successful sf_autonomous_checkpoint tool call.",
|
||||
acceptanceCriteria: [
|
||||
"Missing-checkpoint repair attempts include failure classification in the prompt.",
|
||||
"Repeated repair failures file self-feedback automatically.",
|
||||
"Low-confidence reconstruction uses sf_autonomous_checkpoint outcome=decide with a human acceptance question.",
|
||||
"Loop continues with a synthesized checkpoint instead of pausing for human input.",
|
||||
],
|
||||
occurredIn: { unitType, unitId },
|
||||
source: "runtime",
|
||||
|
|
@ -2409,15 +2411,70 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
},
|
||||
});
|
||||
} catch {
|
||||
// self-feedback is observability; never mask the solver pause
|
||||
// self-feedback is observability; never block loop continuation
|
||||
}
|
||||
}
|
||||
|
||||
// Missing-checkpoint: the LLM failed to call the checkpoint tool despite repair
|
||||
// attempts. Rather than pausing for human input (which defeats the purpose of
|
||||
// autonomous mode), synthesize a minimal "continue" checkpoint and re-dispatch
|
||||
// so the LLM gets another clean attempt. The max-iterations guard will catch
|
||||
// genuine infinite loops. Only hard blockers and max-iterations pause the loop.
|
||||
if (isMissingCheckpoint) {
|
||||
try {
|
||||
appendAutonomousSolverCheckpoint(s.basePath, {
|
||||
unitType,
|
||||
unitId,
|
||||
outcome: "continue",
|
||||
summary: `Synthesized continue after ${solverAssessment.repairAttempts ?? "all"} repair attempt(s) failed to produce a checkpoint (${missingCheckpointDiagnosis?.classification ?? "unknown"}). Re-dispatching.`,
|
||||
completedItems: [],
|
||||
remainingItems: ["Retry unit — checkpoint was missing from prior run"],
|
||||
verificationEvidence: ["synthesized-by-runtime"],
|
||||
pdd: {
|
||||
purpose: "Runtime-synthesized continue to avoid deadlock",
|
||||
consumer: "autonomous loop",
|
||||
contract: "continue",
|
||||
failureBoundary: "max-iterations",
|
||||
evidence: "none",
|
||||
nonGoals: "none",
|
||||
invariants: "none",
|
||||
assumptions: "none",
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// If synthesis fails, fall through to pause below
|
||||
ctx.ui.notify(
|
||||
`Autonomous solver: checkpoint synthesis failed for ${unitType} ${unitId} — pausing`,
|
||||
"warning",
|
||||
);
|
||||
await deps.pauseAuto(ctx, pi);
|
||||
return { action: "break", reason: solverAssessment.reason };
|
||||
}
|
||||
deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: "solver-missing-checkpoint-synthesized-continue",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
repairAttempts: solverAssessment.repairAttempts,
|
||||
classification: missingCheckpointDiagnosis?.classification,
|
||||
},
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Autonomous solver: all repair attempts exhausted for ${unitType} ${unitId} — synthesizing continue and re-dispatching (LLM will try again)`,
|
||||
"info",
|
||||
);
|
||||
// Fall through: the synthesized checkpoint's action will be "continue" on
|
||||
// the next assessment, so the loop re-dispatches the unit automatically.
|
||||
return { action: "continue" };
|
||||
}
|
||||
|
||||
const reason =
|
||||
solverCheckpoint?.outcome === "decide"
|
||||
? (solverCheckpoint.decisionQuestion ?? solverCheckpoint.summary)
|
||||
: solverCheckpoint?.outcome === "blocked"
|
||||
? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary)
|
||||
: solverAssessment.reason;
|
||||
solverCheckpoint?.outcome === "blocked"
|
||||
? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary)
|
||||
: solverAssessment.reason;
|
||||
deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
|
|
@ -2434,7 +2491,6 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
maxIterations: solverAssessment.state?.maxIterations,
|
||||
remainingItems: solverCheckpoint?.remainingItems ?? [],
|
||||
evidencePath: ".sf/runtime/autonomous-solver/LOOP.md",
|
||||
...(missingCheckpointDiagnosis ? { missingCheckpointDiagnosis } : {}),
|
||||
},
|
||||
});
|
||||
ctx.ui.notify(
|
||||
|
|
|
|||
|
|
@ -230,7 +230,7 @@ export function buildAutonomousSolverPromptBlock(state) {
|
|||
'- `outcome: "complete"` only when this unit\'s normal completion tool/artifact is also done.',
|
||||
'- `outcome: "continue"` when you made real progress but more autonomous iterations are needed.',
|
||||
'- `outcome: "blocked"` when the next step cannot proceed without unavailable facts, credentials, or a broken environment.',
|
||||
'- `outcome: "decide"` when there is a material product/architecture choice that must not be decided autonomously.',
|
||||
'- `outcome: "continue"` also when you are unsure — reconstruct best-effort and keep going rather than asking the human.',
|
||||
"",
|
||||
"Checkpoint the eight PDD fields every time:",
|
||||
"- Purpose: why this behavior exists and what value it protects.",
|
||||
|
|
@ -297,7 +297,7 @@ export function appendAutonomousSolverCheckpoint(basePath, params) {
|
|||
status:
|
||||
params.outcome === "complete"
|
||||
? "complete"
|
||||
: params.outcome === "blocked" || params.outcome === "decide"
|
||||
: params.outcome === "blocked"
|
||||
? "paused"
|
||||
: "running",
|
||||
updatedAt: checkpoint.ts,
|
||||
|
|
@ -507,7 +507,7 @@ export function assessAutonomousSolverTurn(basePath, unitType, unitId) {
|
|||
checkpoint,
|
||||
};
|
||||
}
|
||||
if (checkpoint.outcome === "blocked" || checkpoint.outcome === "decide") {
|
||||
if (checkpoint.outcome === "blocked") {
|
||||
return {
|
||||
action: "pause",
|
||||
reason: `solver-${checkpoint.outcome}`,
|
||||
|
|
@ -515,8 +515,9 @@ export function assessAutonomousSolverTurn(basePath, unitType, unitId) {
|
|||
checkpoint,
|
||||
};
|
||||
}
|
||||
// "decide" is treated as "continue": agent reconstructs best-effort and moves on
|
||||
return {
|
||||
action: checkpoint.outcome === "continue" ? "continue" : "complete",
|
||||
action: checkpoint.outcome === "continue" || checkpoint.outcome === "decide" ? "continue" : "complete",
|
||||
reason: `solver-${checkpoint.outcome}`,
|
||||
state,
|
||||
checkpoint,
|
||||
|
|
@ -657,15 +658,16 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
|
|||
"2. List files in the milestone/slice/task directories to find what artifacts exist.",
|
||||
"3. Read any SUMMARY.md or PLAN.md files to understand what progress was made.",
|
||||
"4. Based on the evidence, call sf_autonomous_checkpoint with the appropriate outcome and PDD fields.",
|
||||
"5. **Important**: If you cannot determine what happened with high confidence (≥0.98), use outcome='decide' and ask the human what the checkpoint should contain.",
|
||||
"5. Based on the evidence, call sf_autonomous_checkpoint with the appropriate outcome and PDD fields.",
|
||||
"6. If you cannot determine what happened with high confidence, reconstruct best-effort and use outcome='continue' or outcome='complete' as appropriate — do not pause for human input.",
|
||||
);
|
||||
lines.push(
|
||||
"",
|
||||
"**Low-confidence reconstruction guidance**:",
|
||||
"- Use outcome='decide' when evidence is sparse or ambiguous (confidence < 0.98)",
|
||||
"- Use outcome='decide' when you cannot verify what work was actually completed",
|
||||
"- Use outcome='decide' when there are multiple possible interpretations of progress",
|
||||
"- This ensures autonomous mode pauses for human acceptance rather than guessing incorrectly",
|
||||
"- Use outcome='continue' when evidence is sparse or ambiguous — reconstruct best-effort and let the loop proceed",
|
||||
"- Use outcome='complete' only when there is clear evidence the task was finished",
|
||||
"- Use outcome='blocked' only when there is a hard blocker that prevents forward progress",
|
||||
"- Never use the decide outcome — reconstruct autonomously even under uncertainty",
|
||||
);
|
||||
} else if (repairAttempt <= 1) {
|
||||
lines.push("Do not continue implementation work in this repair turn.");
|
||||
|
|
@ -686,15 +688,15 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
|
|||
}
|
||||
if (repairAttempt >= 3) {
|
||||
lines.push(
|
||||
'If your confidence that the reconstructed checkpoint is correct is below 0.98, call sf_autonomous_checkpoint with outcome="decide" and put the human acceptance question in decisionQuestion.',
|
||||
"If your confidence that the reconstructed checkpoint is correct is below 0.98, use outcome='continue' and describe the uncertainty in the notes — do not pause for human input.",
|
||||
);
|
||||
}
|
||||
if (repairAttempt >= maxRepairAttempts) {
|
||||
lines.push(
|
||||
'This is the final automatic repair attempt. Prefer outcome="decide" over guessing; autonomous mode will pause with your decision question for human acceptance.',
|
||||
"This is the final automatic repair attempt. Always use outcome='continue' or outcome='complete' — never the decide outcome. Reconstruct best-effort and let the autonomous loop continue.",
|
||||
);
|
||||
lines.push(
|
||||
'**Final guidance**: If there is any doubt about the correctness of the checkpoint, use outcome="decide" with a clear question asking the human to specify the correct state.',
|
||||
"**Final guidance**: Commit to the most plausible interpretation of the evidence and checkpoint with that outcome. Do not pause for human review.",
|
||||
);
|
||||
}
|
||||
lines.push(
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ export function formatMcpServerDetail(server) {
|
|||
}
|
||||
// ─── Command handler ────────────────────────────────────────────────────────
|
||||
/**
|
||||
* Handle `/mcp [status|check <server>]`.
|
||||
* Handle `/mcp [status|check <server>|reload]`.
|
||||
*/
|
||||
export async function handleMcpStatus(args, ctx) {
|
||||
const trimmed = args.trim();
|
||||
|
|
@ -115,6 +115,31 @@ export async function handleMcpStatus(args, ctx) {
|
|||
);
|
||||
return;
|
||||
}
|
||||
// /mcp reload — disconnect all, re-read config, reconnect lazily on next use
|
||||
if (lowered === "reload") {
|
||||
try {
|
||||
const mcpClient = await import("../mcp-client/index.js");
|
||||
if (typeof mcpClient.disconnectAll === "function") {
|
||||
await mcpClient.disconnectAll();
|
||||
const fresh = readMcpConfigs();
|
||||
ctx.ui.notify(
|
||||
`MCP servers reloaded — ${fresh.length} server(s) configured. Connections will re-establish on next use.\n\n${fresh.map((s) => ` ○ ${s.name} (${s.transport})`).join("\n") || " (none)"}`,
|
||||
"info",
|
||||
);
|
||||
} else {
|
||||
ctx.ui.notify(
|
||||
"MCP client does not support hot-reload. Use /reload to restart the extension layer.",
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
ctx.ui.notify(
|
||||
"Failed to reload MCP servers. Config may be invalid — check .mcp.json or .sf/mcp.json.",
|
||||
"error",
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// /mcp check <server>
|
||||
if (lowered.startsWith("check ")) {
|
||||
const serverName = trimmed.slice("check ".length).trim();
|
||||
|
|
@ -190,9 +215,10 @@ export async function handleMcpStatus(args, ctx) {
|
|||
}
|
||||
// Unknown subcommand
|
||||
ctx.ui.notify(
|
||||
"Usage: /mcp [status|check <server>]\n\n" +
|
||||
"Usage: /mcp [status|check <server>|reload]\n\n" +
|
||||
" status Show all MCP server statuses (default)\n" +
|
||||
" check <server> Detailed status for a specific server",
|
||||
" check <server> Detailed status for a specific server\n" +
|
||||
" reload Disconnect all servers and re-read config (no restart needed)",
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ export const TOP_LEVEL_SUBCOMMANDS = [
|
|||
desc: "Switch to repair work mode and run diagnostics [--autonomous]",
|
||||
},
|
||||
{ cmd: "tasks", desc: "Background work surface — units, workers, budget" },
|
||||
{ cmd: "skills", desc: "List discovered skills from .agents/skills/" },
|
||||
{ cmd: "skills", desc: "List discovered skills from .agents/skills/ [reload|--eval|--auto-create]" },
|
||||
{
|
||||
cmd: "uok",
|
||||
desc: "UOK runtime health: ledger, last run, last error, startup gate, gate metrics",
|
||||
|
|
@ -461,6 +461,10 @@ const NESTED_COMPLETIONS = {
|
|||
mcp: [
|
||||
{ cmd: "status", desc: "Show all MCP server statuses (default)" },
|
||||
{ cmd: "check", desc: "Detailed status for a specific server" },
|
||||
{
|
||||
cmd: "reload",
|
||||
desc: "Disconnect all MCP servers and re-read config — no restart needed",
|
||||
},
|
||||
],
|
||||
doctor: [
|
||||
{ cmd: "fix", desc: "Auto-fix detected issues" },
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ export function showHelp(ctx, args = "") {
|
|||
" /doctor Diagnose and repair .sf/ state",
|
||||
" /repair Switch to repair work mode and run diagnostics",
|
||||
" /tasks Background work surface",
|
||||
" /skills List discovered skills",
|
||||
" /skills List discovered skills [reload|--eval <name>|--auto-create]",
|
||||
" /cost Show cost summary [--session|--all|--prometheus]",
|
||||
"",
|
||||
"Use /help all for the complete command reference.",
|
||||
|
|
@ -140,13 +140,14 @@ export function showHelp(ctx, args = "") {
|
|||
" /hooks Show post-unit hook configuration",
|
||||
" /extensions Manage extensions [list|enable|disable|info]",
|
||||
" /fast Toggle OpenAI service tier [on|off|flex|status]",
|
||||
" /mcp External MCP server status [status|check <server>]",
|
||||
" /mcp External MCP server status [status|check <server>|reload]",
|
||||
"",
|
||||
"MAINTENANCE",
|
||||
" /doctor Diagnose and repair .sf/ state [audit|fix|heal] [scope]",
|
||||
" /repair Switch to repair work mode and run diagnostics [--autonomous]",
|
||||
" /tasks Background work surface [--refresh|--failed|--cancelled|--all]",
|
||||
" /skills List discovered skills from .agents/skills/",
|
||||
" /skills reload Reload skills from disk — picks up new/updated skill files",
|
||||
" /skills --eval <name> Run eval cases for a skill",
|
||||
" /reload Snapshot & reload agent, resume same session",
|
||||
" /export Export milestone/slice results [--json|--markdown|--html] [--all]",
|
||||
|
|
@ -687,6 +688,16 @@ export async function handleCoreCommand(trimmed, ctx, pi) {
|
|||
}
|
||||
if (trimmed === "skills" || trimmed.startsWith("skills ")) {
|
||||
const args = trimmed.replace(/^skills\s*/, "").trim();
|
||||
// Reload mode: re-read skills from disk and refresh the extension layer
|
||||
if (args === "reload") {
|
||||
ctx.ui.notify("Reloading skills from disk...", "info");
|
||||
await ctx.reload();
|
||||
ctx.ui.notify(
|
||||
"Skills reloaded. New and updated skill files are now active.",
|
||||
"info",
|
||||
);
|
||||
return true;
|
||||
}
|
||||
// Auto-create mode: detect patterns and generate skills
|
||||
if (args === "--auto-create" || args === "-a") {
|
||||
const {
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ function pdd(overrides = {}) {
|
|||
contract:
|
||||
"Checkpoint contains outcome, progress, evidence, and remaining work.",
|
||||
failureBoundary:
|
||||
"Blocked or decide outcomes pause instead of continuing blind.",
|
||||
"Only blocked outcomes pause; decide is treated as continue (auto-reconstruct).",
|
||||
evidence: "Projection and JSONL history are written.",
|
||||
nonGoals: "Does not replace the normal task completion tool.",
|
||||
invariants: "Each checkpoint is tied to one unit id.",
|
||||
|
|
@ -129,7 +129,8 @@ describe("autonomous solver", () => {
|
|||
expect(prompt).toContain("Purpose:");
|
||||
expect(prompt).toContain("Consumer:");
|
||||
expect(prompt).toContain("Failure boundary:");
|
||||
expect(prompt).toContain('outcome: "decide"');
|
||||
expect(prompt).not.toContain('outcome: "decide"');
|
||||
expect(prompt).toContain("reconstruct best-effort");
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverMissingCheckpointRepairPrompt_rejects_file_substitutes", () => {
|
||||
|
|
@ -145,7 +146,7 @@ describe("autonomous solver", () => {
|
|||
expect(prompt).toContain("final action");
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverMissingCheckpointRepairPrompt_escalates_to_confidence_gated_decide", () => {
|
||||
test("buildAutonomousSolverMissingCheckpointRepairPrompt_escalates_to_autonomous_reconstruct", () => {
|
||||
const prompt = buildAutonomousSolverMissingCheckpointRepairPrompt(
|
||||
{ iteration: 2 },
|
||||
"research-slice",
|
||||
|
|
@ -158,8 +159,8 @@ describe("autonomous solver", () => {
|
|||
expect(prompt).toContain("Repair attempt: 3 of 4");
|
||||
expect(prompt).toContain("confidence");
|
||||
expect(prompt).toContain("0.98");
|
||||
expect(prompt).toContain('outcome="decide"');
|
||||
expect(prompt).toContain("decisionQuestion");
|
||||
expect(prompt).not.toContain('outcome="decide"');
|
||||
expect(prompt).toContain("outcome='continue'");
|
||||
});
|
||||
|
||||
test("assessAutonomousSolverTurn_missing_checkpoint_escalates_repairs_then_pauses", () => {
|
||||
|
|
@ -243,6 +244,31 @@ describe("autonomous solver", () => {
|
|||
expect(blocked.reason).toBe("solver-blocked");
|
||||
});
|
||||
|
||||
test("assessAutonomousSolverTurn_decide_continues_instead_of_pausing", () => {
|
||||
// "decide" outcome was previously a human-in-the-loop escape hatch.
|
||||
// Policy change: treat "decide" as "continue" — auto-reconstruct best-effort.
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
outcome: "decide",
|
||||
summary: "Low confidence — reconstructed best-effort.",
|
||||
completedItems: ["Analysis done"],
|
||||
remainingItems: [],
|
||||
verificationEvidence: ["artifacts match expectations"],
|
||||
pdd: pdd(),
|
||||
});
|
||||
const result = assessAutonomousSolverTurn(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
// Must not pause — the loop should continue autonomously
|
||||
expect(result.action).not.toBe("pause");
|
||||
expect(result.action).toBe("continue");
|
||||
});
|
||||
|
||||
test("assessAutonomousSolverTurn_max_iterations_pauses_before_unbounded_retry", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01", {
|
||||
|
|
@ -298,7 +324,8 @@ describe("autonomous solver", () => {
|
|||
expect(prompt).toContain("No transcript was captured");
|
||||
expect(prompt).toContain(".sf/runtime/autonomous-solver/LOOP.md");
|
||||
expect(prompt).toContain("SUMMARY.md");
|
||||
expect(prompt).toContain("outcome='decide'");
|
||||
expect(prompt).not.toContain("outcome='decide'");
|
||||
expect(prompt).toContain("outcome='continue'");
|
||||
});
|
||||
|
||||
test("getConfiguredAutonomousSolverMaxIterations_clamps_preference", () => {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue