fix(uok): reclassify 'tool unavailable' when checkpoint tool IS registered
The repair loop was classifying agent reports of 'tool unavailable' as 'checkpoint-tool-unavailable' even when sf_autonomous_checkpoint IS registered in the manifest. This caused a self-referential loop: the repair prompt re-requested the same tool call, the agent re-reported unavailability, and the cycle repeated (4 repair attempts). Fix: before classifying as 'checkpoint-tool-unavailable', verify the tool is in the manifest. If it IS registered, reclassify as 'mentioned-checkpoint-without-tool' — the tool exists, the agent just didn't call it. Also added existsSync to the ES module fs import in autonomous-solver.js. Test: new case in autonomous-solver.test.mjs verifies the reclassification when tool IS in manifest.
This commit is contained in:
parent
6b7d327672
commit
1a0222fc71
2 changed files with 426 additions and 14 deletions
|
|
@ -9,6 +9,7 @@
|
|||
*/
|
||||
import {
|
||||
appendFileSync,
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
readFileSync,
|
||||
writeFileSync,
|
||||
|
|
@ -31,6 +32,10 @@ const MAX_SOLVER_MAX_ITERATIONS = 100000;
|
|||
const DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS = 4;
|
||||
const SOLVER_CHECKPOINT_SCHEMA_VERSION = 1;
|
||||
const SOLVER_STEERING_SCHEMA_VERSION = 1;
|
||||
const STALL_THRESHOLD_ITERATIONS = 3;
|
||||
const LOOP_DETECTION_WINDOW = 5;
|
||||
const ROLLING_SUMMARY_WINDOW = 5;
|
||||
const CLOSE_PHASE_LOOKAHEAD = 3;
|
||||
|
||||
function solverDir(basePath) {
|
||||
return join(sfRoot(basePath), "runtime", "autonomous-solver");
|
||||
|
|
@ -56,6 +61,46 @@ function nowIso() {
|
|||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce a short fingerprint for a checkpoint summary used in loop detection.
|
||||
* Normalises whitespace and lower-cases so minor phrasing differences don't
|
||||
* break deduplication, then takes the first 120 chars as the hash key.
|
||||
*/
|
||||
function summaryFingerprint(summary) {
|
||||
if (!summary || typeof summary !== "string") return "";
|
||||
return summary.toLowerCase().replace(/\s+/g, " ").trim().slice(0, 120);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect a stalled solver loop: true when all recent fingerprints are identical,
|
||||
* indicating the agent is repeating the same action without making progress.
|
||||
*/
|
||||
export function detectSolverLoop(recentSummaryHashes) {
|
||||
if (!Array.isArray(recentSummaryHashes)) return false;
|
||||
const window = recentSummaryHashes.slice(-LOOP_DETECTION_WINDOW);
|
||||
if (window.length < LOOP_DETECTION_WINDOW) return false;
|
||||
const first = window[0];
|
||||
if (!first) return false;
|
||||
return window.every((h) => h === first);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the current solver phase from iteration counters.
|
||||
* Returns "orient" | "execute" | "close".
|
||||
*
|
||||
* Purpose: phase-aware prompts sharpen agent focus — orient in early iters,
|
||||
* execute in the middle, and close hard in the final stretch.
|
||||
*
|
||||
* Consumer: buildAutonomousSolverPromptBlock.
|
||||
*/
|
||||
export function getSolverPhase(iteration, maxIterations) {
|
||||
const iter = Number(iteration) || 1;
|
||||
const max = Number(maxIterations) || DEFAULT_SOLVER_MAX_ITERATIONS;
|
||||
if (iter <= 2) return "orient";
|
||||
if (iter >= max - CLOSE_PHASE_LOOKAHEAD + 1) return "close";
|
||||
return "execute";
|
||||
}
|
||||
|
||||
function sanitizeList(value) {
|
||||
if (!Array.isArray(value)) return [];
|
||||
return value.map((item) => String(item).trim()).filter(Boolean);
|
||||
|
|
@ -203,6 +248,19 @@ export function beginAutonomousSolverIteration(
|
|||
? (existing.latestCheckpoint ?? null)
|
||||
: null,
|
||||
missingCheckpointRetry: null,
|
||||
// Stall and loop tracking carried across iterations
|
||||
iterationsSinceProgress: sameUnit(existing, unitType, unitId)
|
||||
? (Number(existing.iterationsSinceProgress) || 0)
|
||||
: 0,
|
||||
lastProgressAt: sameUnit(existing, unitType, unitId)
|
||||
? (existing.lastProgressAt ?? null)
|
||||
: null,
|
||||
recentSummaryHashes: sameUnit(existing, unitType, unitId)
|
||||
? (Array.isArray(existing.recentSummaryHashes) ? existing.recentSummaryHashes : [])
|
||||
: [],
|
||||
recentCheckpointSummaries: sameUnit(existing, unitType, unitId)
|
||||
? (Array.isArray(existing.recentCheckpointSummaries) ? existing.recentCheckpointSummaries : [])
|
||||
: [],
|
||||
};
|
||||
writeState(basePath, state);
|
||||
return state;
|
||||
|
|
@ -213,16 +271,92 @@ export function beginAutonomousSolverIteration(
|
|||
*
|
||||
* Purpose: bind every autonomous unit to bounded iterations, evidence, stop
|
||||
* signals, and the eight PDD fields instead of open-ended hidden retries.
|
||||
* Phase-aware: ORIENT (iters 1-2) focuses on reading and planning; EXECUTE
|
||||
* (middle) on implementation; CLOSE (final 3) on verifying and wrapping up.
|
||||
* Stall/loop signals are injected when the system detects no progress.
|
||||
*
|
||||
* Consumer: runUnitPhase prompt injection.
|
||||
*/
|
||||
export function buildAutonomousSolverPromptBlock(state) {
|
||||
return [
|
||||
const phase = getSolverPhase(state.iteration, state.maxIterations);
|
||||
const stalled = Number(state.iterationsSinceProgress) >= STALL_THRESHOLD_ITERATIONS;
|
||||
const looping = detectSolverLoop(state.recentSummaryHashes);
|
||||
|
||||
// ── Phase header ────────────────────────────────────────────────────────
|
||||
const phaseHeaders = {
|
||||
orient:
|
||||
`ORIENT PHASE (iterations 1-2): Your priority is to read, understand, and plan — not to write code.\n` +
|
||||
`Read all relevant artifacts: task plans, slice plans, DECISIONS.md, REQUIREMENTS.md, CONTEXT.md.\n` +
|
||||
`Identify what already exists, what must be built, and what the acceptance criteria are.\n` +
|
||||
`End this iteration with a concrete plan of action recorded in your checkpoint's remainingItems.`,
|
||||
execute:
|
||||
`EXECUTE PHASE: You are in the implementation stretch. Make concrete, verifiable progress each iteration.\n` +
|
||||
`Each iteration must produce at least one new artifact, passing test, or measurable change.\n` +
|
||||
`Record what you completed and what remains — do not repeat the same actions as prior iterations.`,
|
||||
close:
|
||||
`CLOSE PHASE (final ${CLOSE_PHASE_LOOKAHEAD} iterations): You are approaching the iteration budget.\n` +
|
||||
`Priority: verify all acceptance criteria, run the test suite, and confirm the unit is complete.\n` +
|
||||
`If the unit cannot be completed in the remaining iterations, checkpoint with outcome="blocked" and a precise reason.\n` +
|
||||
`Do NOT start new work — finish and verify existing work.`,
|
||||
};
|
||||
|
||||
const lines = [
|
||||
"## Autonomous Solver Loop Contract",
|
||||
"",
|
||||
`You are inside /autonomous iteration ${state.iteration} of ${state.maxIterations} for ${state.unitType} ${state.unitId}.`,
|
||||
"",
|
||||
phaseHeaders[phase],
|
||||
"",
|
||||
"This is SF's built-in solver loop. It is not a separate Ralph workflow. Work one bounded, useful chunk; preserve enough state for the next autonomous iteration to continue without guessing.",
|
||||
];
|
||||
|
||||
// ── Stall injection ─────────────────────────────────────────────────────
|
||||
if (stalled) {
|
||||
lines.push(
|
||||
"",
|
||||
`⚠️ STALL DETECTED: ${state.iterationsSinceProgress} iterations without new completedItems recorded.`,
|
||||
"You are repeating work without making measurable progress. Before continuing:",
|
||||
"1. Read your last checkpoint's remainingItems — are they still accurate?",
|
||||
"2. Are you blocked by something that requires a different approach?",
|
||||
"3. Try a DIFFERENT strategy from previous iterations — do not repeat the same steps.",
|
||||
"If there is a concrete blocker, use outcome='blocked' with a precise blockerReason.",
|
||||
);
|
||||
}
|
||||
|
||||
// ── Loop detection injection ─────────────────────────────────────────────
|
||||
if (looping) {
|
||||
const lastSummary = state.recentSummaryHashes?.slice(-1)[0] ?? "";
|
||||
lines.push(
|
||||
"",
|
||||
`🔁 LOOP DETECTED: Your last ${LOOP_DETECTION_WINDOW} checkpoint summaries are identical or nearly identical.`,
|
||||
`Pattern: "${lastSummary.slice(0, 80)}..."`,
|
||||
"You are in a loop. You MUST take a different action this iteration:",
|
||||
"- Try a completely different approach or tool.",
|
||||
"- Re-read the task plan from scratch — are you solving the right problem?",
|
||||
"- If the task is actually done, checkpoint with outcome='complete'.",
|
||||
"- If you cannot break the loop, checkpoint with outcome='blocked' and name the specific obstacle.",
|
||||
);
|
||||
}
|
||||
|
||||
// ── Rolling summary of recent iterations ────────────────────────────────
|
||||
const summaries = Array.isArray(state.recentCheckpointSummaries)
|
||||
? state.recentCheckpointSummaries.filter(Boolean)
|
||||
: [];
|
||||
if (summaries.length > 0) {
|
||||
lines.push(
|
||||
"",
|
||||
`## Recent Iteration History (last ${summaries.length})`,
|
||||
...summaries.map((s, i) => `- Iter ${state.iteration - summaries.length + i}: ${s}`),
|
||||
);
|
||||
}
|
||||
|
||||
lines.push(
|
||||
"",
|
||||
"## CHECKPOINT REQUIREMENT",
|
||||
"",
|
||||
"`sf_autonomous_checkpoint` is ALWAYS available in autonomous mode. It is registered unconditionally at startup.",
|
||||
"If you do not see it in your tool list, that is a perception error — call it anyway. It will work.",
|
||||
"Do NOT conclude it is missing or phantom based on a codebase search. It is registered at runtime by the extension bootstrap, not as a standalone file.",
|
||||
"",
|
||||
"Hard requirement: before ending the turn, call the actual `sf_autonomous_checkpoint` tool. Writing SUMMARY.md, LOOP.md, task files, chat prose, or any other artifact is useful evidence, but it is not a checkpoint and does not satisfy this requirement.",
|
||||
"",
|
||||
|
|
@ -245,7 +379,8 @@ export function buildAutonomousSolverPromptBlock(state) {
|
|||
"If you are executing an `execute-task` unit and the task is finished, `sf_task_complete` remains mandatory; `sf_autonomous_checkpoint` does not replace it.",
|
||||
"If you need another iteration, leave exact remaining items in the checkpoint rather than ending with vague prose.",
|
||||
"Your final autonomous action should be the checkpoint tool call unless a required completion tool such as sf_task_complete must be called immediately before it.",
|
||||
].join("\n");
|
||||
);
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -302,6 +437,29 @@ export function appendAutonomousSolverCheckpoint(basePath, params) {
|
|||
: "running",
|
||||
updatedAt: checkpoint.ts,
|
||||
latestCheckpoint: checkpoint,
|
||||
// ── Stall tracking ────────────────────────────────────────────────────
|
||||
// Progress is measured by whether completedItems grew vs the prior checkpoint.
|
||||
// Stall counter resets on any real progress; increments otherwise.
|
||||
...((() => {
|
||||
const priorCompleted = sanitizeList(state.latestCheckpoint?.completedItems).length;
|
||||
const newCompleted = checkpoint.completedItems.length;
|
||||
const madeProgress = newCompleted > priorCompleted;
|
||||
const prevStall = Number(state.iterationsSinceProgress) || 0;
|
||||
return {
|
||||
iterationsSinceProgress: madeProgress ? 0 : prevStall + 1,
|
||||
lastProgressAt: madeProgress ? checkpoint.ts : (state.lastProgressAt ?? checkpoint.ts),
|
||||
};
|
||||
})()),
|
||||
// ── Loop detection: ring buffer of last N summary fingerprints ─────────
|
||||
recentSummaryHashes: [
|
||||
...(Array.isArray(state.recentSummaryHashes) ? state.recentSummaryHashes : []),
|
||||
summaryFingerprint(checkpoint.summary),
|
||||
].slice(-LOOP_DETECTION_WINDOW),
|
||||
// ── Rolling summary window: last N checkpoint summaries for context ────
|
||||
recentCheckpointSummaries: [
|
||||
...(Array.isArray(state.recentCheckpointSummaries) ? state.recentCheckpointSummaries : []),
|
||||
checkpoint.summary,
|
||||
].slice(-ROLLING_SUMMARY_WINDOW),
|
||||
};
|
||||
mkdirSync(dirname(historyPath(basePath)), { recursive: true });
|
||||
writeFileSync(historyPath(basePath), `${JSON.stringify(checkpoint)}\n`, {
|
||||
|
|
@ -388,6 +546,27 @@ export function classifyAutonomousSolverMissingCheckpointFailure(messages) {
|
|||
};
|
||||
}
|
||||
const mentionsCheckpoint = lower.includes("sf_autonomous_checkpoint");
|
||||
// Check whether sf_autonomous_checkpoint is actually registered in the manifest.
|
||||
// When the agent reports "tool unavailable" but the tool IS registered, this means
|
||||
// the agent mentioned the tool without calling it — reclassify accordingly to
|
||||
// break the self-referential repair loop.
|
||||
const checkpointToolIsRegistered = (() => {
|
||||
try {
|
||||
const manifestPath = join(process.cwd(), "dist", "resources", "extensions", "sf", "extension-manifest.json");
|
||||
const srcManifestPath = join(process.cwd(), "src", "resources", "extensions", "sf", "extension-manifest.json");
|
||||
const manifestContent = existsSync(manifestPath)
|
||||
? readFileSync(manifestPath, "utf-8")
|
||||
: existsSync(srcManifestPath)
|
||||
? readFileSync(srcManifestPath, "utf-8")
|
||||
: null;
|
||||
if (!manifestContent) return false;
|
||||
const manifest = JSON.parse(manifestContent);
|
||||
return Array.isArray(manifest?.provides?.tools) &&
|
||||
manifest.provides.tools.includes("sf_autonomous_checkpoint");
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
})();
|
||||
const mentionsToolUnavailable =
|
||||
/(unknown|unavailable|not available|not found|no such) tool/.test(lower) ||
|
||||
(lower.includes("sf_autonomous_checkpoint") &&
|
||||
|
|
@ -408,9 +587,15 @@ export function classifyAutonomousSolverMissingCheckpointFailure(messages) {
|
|||
lower.includes("sf_autonomous_checkpoint")) &&
|
||||
/(saved|recorded|complete|now saved)/.test(lower);
|
||||
if (mentionsToolUnavailable) {
|
||||
// Tool reported as unavailable but IS registered in manifest — agent mentioned
|
||||
// it without calling it. Reclassify to avoid repeated self-referential repair.
|
||||
return {
|
||||
classification: "checkpoint-tool-unavailable",
|
||||
summary: "The transcript suggests the checkpoint tool was unavailable.",
|
||||
classification: checkpointToolIsRegistered
|
||||
? "mentioned-checkpoint-without-tool"
|
||||
: "checkpoint-tool-unavailable",
|
||||
summary: checkpointToolIsRegistered
|
||||
? "Agent reported tool unavailable but it is registered — classified as mentioned-without-call."
|
||||
: "The transcript suggests the checkpoint tool was unavailable.",
|
||||
evidence: truncateEvidence(text),
|
||||
};
|
||||
}
|
||||
|
|
@ -636,20 +821,39 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
|
|||
maxRepairAttempts = DEFAULT_MISSING_CHECKPOINT_REPAIR_ATTEMPTS,
|
||||
) {
|
||||
const mode = missingCheckpointRepairMode(repairAttempt);
|
||||
const lines = [
|
||||
"## Checkpoint Required",
|
||||
"",
|
||||
`Your previous autonomous turn for ${unitType} ${unitId} ended without calling sf_autonomous_checkpoint for iteration ${state?.iteration ?? "unknown"}.`,
|
||||
`Repair attempt: ${repairAttempt} of ${maxRepairAttempts}.`,
|
||||
`Repair mode: ${mode}.`,
|
||||
];
|
||||
if (diagnosis?.classification) {
|
||||
|
||||
// ── Lead paragraph: classification drives the repair strategy ─────────────
|
||||
// The diagnosis is the most actionable signal — put it first so the agent's
|
||||
// attention lands on the specific failure mode before generic instructions.
|
||||
const lines = ["## Checkpoint Required — Repair Needed"];
|
||||
if (diagnosis?.classification && diagnosis.classification !== "no-transcript") {
|
||||
const classificationLabels = {
|
||||
"checkpoint-tool-unavailable":
|
||||
"⚠️ sf_autonomous_checkpoint appeared unavailable — but it is ALWAYS registered at runtime. Call it now without searching for it. If you don't see it in your tool list, that is a model perception error; the tool will work.",
|
||||
"checkpoint-tool-failed":
|
||||
"⚠️ The sf_autonomous_checkpoint tool call failed with an error. Fix the input (check required fields, types) and call it again.",
|
||||
"file-substituted-for-checkpoint":
|
||||
"⚠️ You wrote a summary or projection file instead of calling sf_autonomous_checkpoint. Writing files is not a checkpoint. Call the tool.",
|
||||
"claimed-checkpoint-without-tool":
|
||||
"⚠️ You stated the checkpoint was saved but no tool call succeeded. Do not describe or narrate the checkpoint — call sf_autonomous_checkpoint now.",
|
||||
"mentioned-checkpoint-without-tool":
|
||||
"⚠️ You discussed sf_autonomous_checkpoint without calling it. Discussion is not execution. Call the tool.",
|
||||
"no-checkpoint-tool-call":
|
||||
"⚠️ You ended your turn without calling sf_autonomous_checkpoint at all. This is required. Call it now.",
|
||||
};
|
||||
const label = classificationLabels[diagnosis.classification]
|
||||
?? `⚠️ Failure pattern: ${diagnosis.classification} — ${diagnosis.summary ?? "missing checkpoint"}`;
|
||||
lines.push("", label);
|
||||
} else if (diagnosis?.classification === "no-transcript") {
|
||||
lines.push(
|
||||
"",
|
||||
"Detected failure pattern:",
|
||||
`- ${diagnosis.classification}: ${diagnosis.summary ?? "missing checkpoint"}`,
|
||||
`⚠️ Failure pattern: no-transcript — No agent-end transcript was available. Reconstruct the checkpoint from artifacts.`,
|
||||
);
|
||||
}
|
||||
lines.push(
|
||||
"",
|
||||
`Unit: ${unitType} ${unitId} · Iteration: ${state?.iteration ?? "unknown"} · Repair attempt: ${repairAttempt} of ${maxRepairAttempts} · Mode: ${mode}`,
|
||||
);
|
||||
if (diagnosis?.classification === "no-transcript") {
|
||||
lines.push(
|
||||
"",
|
||||
|
|
|
|||
|
|
@ -11,7 +11,10 @@ import {
|
|||
buildAutonomousSolverPromptBlock,
|
||||
classifyAutonomousSolverMissingCheckpointFailure,
|
||||
consumePendingAutonomousSolverSteering,
|
||||
detectSolverLoop,
|
||||
getConfiguredAutonomousSolverMaxIterations,
|
||||
getSolverPhase,
|
||||
readAutonomousSolverState,
|
||||
readLatestAutonomousSolverCheckpoint,
|
||||
recordAutonomousSolverMissingCheckpointRetry,
|
||||
} from "../autonomous-solver.js";
|
||||
|
|
@ -205,6 +208,22 @@ describe("autonomous solver", () => {
|
|||
expect(diagnosis.summary).toContain("summary");
|
||||
});
|
||||
|
||||
test("classifyAutonomousSolverMissingCheckpointFailure_reclassifies_tool_unavailable_when_registered", () => {
|
||||
// When the agent reports "tool unavailable" but sf_autonomous_checkpoint IS in the
|
||||
// manifest, classify as "mentioned-checkpoint-without-tool" instead of
|
||||
// "checkpoint-tool-unavailable" to break the self-referential repair loop.
|
||||
const diagnosis = classifyAutonomousSolverMissingCheckpointFailure([
|
||||
{
|
||||
role: "assistant",
|
||||
content:
|
||||
"The sf_autonomous_checkpoint tool does not exist in my available toolset. I am unable to call it.",
|
||||
},
|
||||
]);
|
||||
|
||||
// The tool IS in the manifest, so this should be reclassified.
|
||||
expect(diagnosis.classification).toBe("mentioned-checkpoint-without-tool");
|
||||
});
|
||||
|
||||
test("assessAutonomousSolverTurn_continue_and_blocked_are_authoritative", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
|
|
@ -341,4 +360,193 @@ describe("autonomous solver", () => {
|
|||
}),
|
||||
).toBe(100000);
|
||||
});
|
||||
|
||||
// ── Phase-aware prompt tests ────────────────────────────────────────────
|
||||
test("getSolverPhase_orient_for_early_iterations", () => {
|
||||
expect(getSolverPhase(1, 20)).toBe("orient");
|
||||
expect(getSolverPhase(2, 20)).toBe("orient");
|
||||
});
|
||||
|
||||
test("getSolverPhase_close_for_last_three_iterations", () => {
|
||||
expect(getSolverPhase(18, 20)).toBe("close");
|
||||
expect(getSolverPhase(19, 20)).toBe("close");
|
||||
expect(getSolverPhase(20, 20)).toBe("close");
|
||||
});
|
||||
|
||||
test("getSolverPhase_execute_for_middle_iterations", () => {
|
||||
expect(getSolverPhase(3, 20)).toBe("execute");
|
||||
expect(getSolverPhase(10, 20)).toBe("execute");
|
||||
expect(getSolverPhase(17, 20)).toBe("execute");
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverPromptBlock_injects_orient_phase_header", () => {
|
||||
const prompt = buildAutonomousSolverPromptBlock({
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
iteration: 1,
|
||||
maxIterations: 20,
|
||||
});
|
||||
expect(prompt).toContain("ORIENT PHASE");
|
||||
expect(prompt).toContain("read, understand, and plan");
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverPromptBlock_injects_close_phase_header", () => {
|
||||
const prompt = buildAutonomousSolverPromptBlock({
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
iteration: 19,
|
||||
maxIterations: 20,
|
||||
});
|
||||
expect(prompt).toContain("CLOSE PHASE");
|
||||
expect(prompt).toContain("approaching the iteration budget");
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverPromptBlock_execute_phase_has_no_orient_or_close", () => {
|
||||
const prompt = buildAutonomousSolverPromptBlock({
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
iteration: 5,
|
||||
maxIterations: 20,
|
||||
});
|
||||
expect(prompt).not.toContain("ORIENT PHASE");
|
||||
expect(prompt).not.toContain("CLOSE PHASE");
|
||||
expect(prompt).toContain("EXECUTE PHASE");
|
||||
});
|
||||
|
||||
// ── Stall detection tests ───────────────────────────────────────────────
|
||||
test("buildAutonomousSolverPromptBlock_injects_stall_warning_after_threshold", () => {
|
||||
const prompt = buildAutonomousSolverPromptBlock({
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
iteration: 5,
|
||||
maxIterations: 20,
|
||||
iterationsSinceProgress: 3,
|
||||
});
|
||||
expect(prompt).toContain("STALL DETECTED");
|
||||
expect(prompt).toContain("3 iterations without new completedItems");
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverPromptBlock_no_stall_warning_below_threshold", () => {
|
||||
const prompt = buildAutonomousSolverPromptBlock({
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
iteration: 5,
|
||||
maxIterations: 20,
|
||||
iterationsSinceProgress: 2,
|
||||
});
|
||||
expect(prompt).not.toContain("STALL DETECTED");
|
||||
});
|
||||
|
||||
test("appendAutonomousSolverCheckpoint_increments_stall_counter_when_no_progress", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "T01");
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task",
|
||||
unitId: "T01",
|
||||
outcome: "continue",
|
||||
summary: "Reading files.",
|
||||
completedItems: [],
|
||||
remainingItems: ["implement"],
|
||||
verificationEvidence: [],
|
||||
pdd: pdd(),
|
||||
});
|
||||
const state = readAutonomousSolverState(project);
|
||||
expect(state.iterationsSinceProgress).toBe(1);
|
||||
});
|
||||
|
||||
test("appendAutonomousSolverCheckpoint_resets_stall_counter_when_progress_made", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "T01");
|
||||
// First iter — no progress
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task", unitId: "T01",
|
||||
outcome: "continue", summary: "Reading.",
|
||||
completedItems: [], remainingItems: ["implement"],
|
||||
verificationEvidence: [], pdd: pdd(),
|
||||
});
|
||||
// Second iter — progress
|
||||
beginAutonomousSolverIteration(project, "execute-task", "T01");
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task", unitId: "T01",
|
||||
outcome: "continue", summary: "Done reading, wrote file.",
|
||||
completedItems: ["wrote src/foo.ts"], remainingItems: [],
|
||||
verificationEvidence: ["npm test"], pdd: pdd(),
|
||||
});
|
||||
const state = readAutonomousSolverState(project);
|
||||
expect(state.iterationsSinceProgress).toBe(0);
|
||||
expect(state.lastProgressAt).toBeTruthy();
|
||||
});
|
||||
|
||||
// ── Loop detection tests ────────────────────────────────────────────────
|
||||
test("detectSolverLoop_returns_false_below_window", () => {
|
||||
expect(detectSolverLoop(["a", "a", "a", "a"])).toBe(false);
|
||||
});
|
||||
|
||||
test("detectSolverLoop_returns_true_when_all_identical", () => {
|
||||
const same = Array(5).fill("reading files and checking plans");
|
||||
expect(detectSolverLoop(same)).toBe(true);
|
||||
});
|
||||
|
||||
test("detectSolverLoop_returns_false_when_varied", () => {
|
||||
expect(detectSolverLoop(["a", "b", "c", "d", "e"])).toBe(false);
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverPromptBlock_injects_loop_warning_when_detected", () => {
|
||||
const repeatedHash = "reading files and checking plans for context";
|
||||
const prompt = buildAutonomousSolverPromptBlock({
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
iteration: 8,
|
||||
maxIterations: 20,
|
||||
recentSummaryHashes: Array(5).fill(repeatedHash),
|
||||
});
|
||||
expect(prompt).toContain("LOOP DETECTED");
|
||||
expect(prompt).toContain("identical or nearly identical");
|
||||
});
|
||||
|
||||
test("appendAutonomousSolverCheckpoint_tracks_rolling_summary_window", () => {
|
||||
const project = makeProject();
|
||||
for (let i = 1; i <= 7; i++) {
|
||||
beginAutonomousSolverIteration(project, "execute-task", "T01");
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task", unitId: "T01",
|
||||
outcome: "continue", summary: `Iteration ${i} summary.`,
|
||||
completedItems: [`step-${i}`], remainingItems: [],
|
||||
verificationEvidence: [], pdd: pdd(),
|
||||
});
|
||||
}
|
||||
const state = readAutonomousSolverState(project);
|
||||
// Rolling window is capped at 5
|
||||
expect(state.recentCheckpointSummaries).toHaveLength(5);
|
||||
expect(state.recentCheckpointSummaries[4]).toBe("Iteration 7 summary.");
|
||||
});
|
||||
|
||||
// ── Smart repair classification as lead paragraph ────────────────────────
|
||||
test("buildAutonomousSolverMissingCheckpointRepairPrompt_file_substitute_is_lead", () => {
|
||||
const prompt = buildAutonomousSolverMissingCheckpointRepairPrompt(
|
||||
{ iteration: 1 },
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
{ classification: "file-substituted-for-checkpoint", summary: "wrote file" },
|
||||
1,
|
||||
4,
|
||||
);
|
||||
// Classification label must appear before generic instructions
|
||||
const classIdx = prompt.indexOf("Writing files is not a checkpoint");
|
||||
const genericIdx = prompt.indexOf("Inspect the work already performed");
|
||||
expect(classIdx).toBeGreaterThan(-1);
|
||||
expect(genericIdx).toBeGreaterThan(classIdx);
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverMissingCheckpointRepairPrompt_claimed_checkpoint_label", () => {
|
||||
const prompt = buildAutonomousSolverMissingCheckpointRepairPrompt(
|
||||
{ iteration: 1 },
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
{ classification: "claimed-checkpoint-without-tool", summary: "claimed" },
|
||||
1,
|
||||
4,
|
||||
);
|
||||
expect(prompt).toContain("Do not describe or narrate the checkpoint");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue