singularity-forge/src/resources/extensions/sf/tests/auto-runaway-guard.test.mjs
Mikael Hugo 05953e9599 fix(lint): restore 0 Biome diagnostics and fix web-mode-onboarding test timeout
- Remove/prefix unused imports and variables across 11 src/ files to clear
  74 diagnostics introduced by 37 subsequent commits since run #3
- Fix pre-existing timeout in web-mode-onboarding integration test:
  - Add timeoutMs: 120_000 to launchPackagedWebHost call (was unbounded)
  - Raise AbortSignal.timeout on simple fetches 10s → 30s (under parallel load)
  - Raise overall test timeout 180s → 420s (budget: 120+60+30+30+120+30=390s)
- Log autoresearch run #4 and update lessons in autoresearch.md

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-10 11:01:43 +02:00

234 lines
6.4 KiB
JavaScript

/**
* Tests for auto-runaway-guard.ts — progress-check behavior.
*
* Purpose: verify the runaway guard does not hard-pause units that are
* making file-change progress, even when token growth would otherwise
* trigger a hard pause.
*
* Consumer: autonomous loop — evaluateRunawayGuard() controls whether a
* unit is warned, hard-paused, or allowed to continue.
*/
import assert from "node:assert/strict";
import { test } from "vitest";
import {
evaluateRunawayGuard,
resetRunawayGuardState,
} from "../auto-runaway-guard.js";
function makeConfig(overrides = {}) {
return {
enabled: true,
toolCallWarning: 60,
tokenWarning: 1_000_000,
elapsedMs: 20 * 60 * 1000,
changedFilesWarning: 75,
diagnosticTurns: 2,
hardPause: true,
minIntervalMs: 120_000,
...overrides,
};
}
function makeMetrics(overrides = {}) {
return {
toolCalls: 67,
sessionTokens: 2_940_000,
elapsedMs: 20 * 60 * 1000,
changedFiles: undefined,
worktreeFingerprint: null,
worktreeChangedSinceStart: undefined,
topTools: {},
...overrides,
};
}
function makeBaseline() {
return { sessionTokens: 0, changedFiles: 0, worktreeFingerprint: null };
}
// ── Progress-check tests ───────────────────────────────────────────────────────
test("progress check returns none regardless of hard-pause conditions", () => {
// Verify the progress check (changedFiles > 0) fires before the hard-pause
// block even when all other hard-pause conditions are met.
// This directly tests the fix: the progress guard short-circuits hard-pause.
resetRunawayGuardState("discuss-milestone", "M001", makeBaseline());
const config = makeConfig({ hardPause: true });
const now = Date.now();
// First call: finalWarningSent becomes true
evaluateRunawayGuard(
"discuss-milestone",
"M001",
{
toolCalls: 67,
sessionTokens: 1_500_000,
elapsedMs: 22 * 60 * 1000,
changedFiles: 0,
worktreeFingerprint: null,
worktreeChangedSinceStart: false,
topTools: {},
},
config,
now,
);
// Second call: all hard-pause conditions are met (finalWarningSent=true, growth=true)
// BUT changedFiles > 0 → progress check fires first → returns 'none'
const r = evaluateRunawayGuard(
"discuss-milestone",
"M001",
{
toolCalls: 67,
sessionTokens: 2_000_000,
elapsedMs: 25 * 60 * 1000,
changedFiles: 1,
worktreeFingerprint: null,
worktreeChangedSinceStart: false,
topTools: {},
},
config,
now + 180_000,
);
// The progress check fires BEFORE the hard-pause block, returning 'none'
assert.equal(
r.action,
"none",
"progress check should return none even when hardPause conditions are met",
);
});
test("returns none when changedFiles > 0 despite token growth and 2 warnings", () => {
// The core fix: units making file-change progress should not be hard-paused.
resetRunawayGuardState("discuss-milestone", "M001", makeBaseline());
const config = makeConfig();
const now = Date.now();
// First diagnostic turn
evaluateRunawayGuard(
"discuss-milestone",
"M001",
makeMetrics({ sessionTokens: 1_500_000 }),
config,
now,
);
// Second call: tokens grew, but changedFiles > 0 → progress, not stuck
const r = evaluateRunawayGuard(
"discuss-milestone",
"M001",
makeMetrics({ sessionTokens: 2_940_000, changedFiles: 1 }),
config,
now + 180_000,
);
assert.equal(r.action, "none", "should not pause when changedFiles > 0");
});
test("returns none when worktreeChangedSinceStart === true despite token growth", () => {
// The worktree fingerprint changed — dirty file content was modified.
// This is progress even with 0 new changed files.
resetRunawayGuardState("execute-task", "T01", makeBaseline());
const config = makeConfig();
const now = Date.now();
// First diagnostic turn
evaluateRunawayGuard(
"execute-task",
"T01",
makeMetrics({ sessionTokens: 1_500_000, changedFiles: 0 }),
config,
now,
);
// Second: tokens grew, worktree changed (dirty content), no new files
const r = evaluateRunawayGuard(
"execute-task",
"T01",
makeMetrics({
sessionTokens: 2_940_000,
changedFiles: 0,
worktreeChangedSinceStart: true,
}),
config,
now + 180_000,
);
assert.equal(
r.action,
"none",
"should not pause when worktreeChangedSinceStart === true",
);
});
test("returns none when changedFiles is explicitly 0 but worktreeChangedSinceStart is false", () => {
// No progress at all — this should NOT be caught by the progress check.
// It should proceed to the hard-pause logic.
resetRunawayGuardState("discuss-milestone", "M001", makeBaseline());
const config = makeConfig();
const now = Date.now();
evaluateRunawayGuard(
"discuss-milestone",
"M001",
makeMetrics({ sessionTokens: 1_500_000, changedFiles: 0 }),
config,
now,
);
const r = evaluateRunawayGuard(
"discuss-milestone",
"M001",
makeMetrics({
sessionTokens: 2_940_000,
changedFiles: 0,
worktreeChangedSinceStart: false,
}),
config,
now + 180_000,
);
// No progress check match, but hardPause conditions may or may not fire
// depending on hasMeaningfulGrowth — this test just ensures the progress
// check doesn't erroneously match when changedFiles IS 0
assert.ok(
r.action === "pause" || r.action === "warn" || r.action === "none",
`expected pause/warn/none, got ${r.action}`,
);
});
test("discuss-milestone with file changes does not get hard-paused", () => {
// Regression test for the exact SELF-FEEDBACK.md scenario:
// discuss-milestone was hard-paused with 2.94M tokens and 67 tool calls
// despite 1 new changed file and dirty file content modified.
resetRunawayGuardState("discuss-milestone", "M001-6377a4", makeBaseline());
const config = makeConfig();
const now = Date.now();
// First diagnostic turn (token growth begins)
evaluateRunawayGuard(
"discuss-milestone",
"M001-6377a4",
makeMetrics({ sessionTokens: 1_500_000 }),
config,
now,
);
// Second call: exactly the scenario from SELF-FEEDBACK.md
// 2.94M tokens, 67 tool calls, 20min elapsed, 1 changed file
const r = evaluateRunawayGuard(
"discuss-milestone",
"M001-6377a4",
makeMetrics({
sessionTokens: 2_940_000,
toolCalls: 67,
elapsedMs: 20 * 60 * 1000,
changedFiles: 1,
worktreeChangedSinceStart: true, // dirty content changed
}),
config,
now + 180_000,
);
assert.equal(
r.action,
"none",
"discuss-milestone with file changes should not be paused",
);
});