singularity-forge/src/resources/extensions/sf/tests/auto-runaway-guard.test.mjs

/**
 * Tests for auto-runaway-guard.ts — progress-check behavior.
 *
 * Purpose: verify the runaway guard does not hard-pause units that are
 * making file-change progress, even when token growth would otherwise
 * trigger a hard pause.
 *
 * Consumer: autonomous loop — evaluateRunawayGuard() controls whether a
 * unit is warned, hard-paused, or allowed to continue.
 */
import assert from "node:assert/strict";
import { test } from "vitest";
import {
	evaluateRunawayGuard,
	resetRunawayGuardState,
} from "../auto-runaway-guard.js";

function makeConfig(overrides = {}) {
	return {
		enabled: true,
		toolCallWarning: 60,
		tokenWarning: 1_000_000,
		elapsedMs: 20 * 60 * 1000,
		changedFilesWarning: 75,
		diagnosticTurns: 2,
		hardPause: true,
		minIntervalMs: 120_000,
		...overrides,
	};
}

function makeMetrics(overrides = {}) {
	return {
		toolCalls: 67,
		sessionTokens: 2_940_000,
		elapsedMs: 20 * 60 * 1000,
		changedFiles: undefined,
		worktreeFingerprint: null,
		worktreeChangedSinceStart: undefined,
		topTools: {},
		...overrides,
	};
}

function makeBaseline() {
	return { sessionTokens: 0, changedFiles: 0, worktreeFingerprint: null };
}

// ── Progress-check tests ───────────────────────────────────────────────────────

test("progress check returns none regardless of hard-pause conditions", () => {
	// Verify the progress check (changedFiles > 0) fires before the hard-pause
	// block even when all other hard-pause conditions are met.
	// This directly tests the fix: the progress guard short-circuits hard-pause.
	resetRunawayGuardState("discuss-milestone", "M001", makeBaseline());
	const config = makeConfig({ hardPause: true });
	const now = Date.now();

	// First call: finalWarningSent becomes true
	evaluateRunawayGuard(
		"discuss-milestone",
		"M001",
		{
			toolCalls: 67,
			sessionTokens: 1_500_000,
			elapsedMs: 22 * 60 * 1000,
			changedFiles: 0,
			worktreeFingerprint: null,
			worktreeChangedSinceStart: false,
			topTools: {},
		},
		config,
		now,
	);

	// Second call: all hard-pause conditions are met (finalWarningSent=true, growth=true)
	// BUT changedFiles > 0 → progress check fires first → returns 'none'
	const r = evaluateRunawayGuard(
		"discuss-milestone",
		"M001",
		{
			toolCalls: 67,
			sessionTokens: 2_000_000,
			elapsedMs: 25 * 60 * 1000,
			changedFiles: 1,
			worktreeFingerprint: null,
			worktreeChangedSinceStart: false,
			topTools: {},
		},
		config,
		now + 180_000,
	);
	// The progress check fires BEFORE the hard-pause block, returning 'none'
	assert.equal(
		r.action,
		"none",
		"progress check should return none even when hardPause conditions are met",
	);
});

test("returns none when changedFiles > 0 despite token growth and 2 warnings", () => {
	// The core fix: units making file-change progress should not be hard-paused.
	resetRunawayGuardState("discuss-milestone", "M001", makeBaseline());
	const config = makeConfig();
	const now = Date.now();

	// First diagnostic turn
	evaluateRunawayGuard(
		"discuss-milestone",
		"M001",
		makeMetrics({ sessionTokens: 1_500_000 }),
		config,
		now,
	);

	// Second call: tokens grew, but changedFiles > 0 → progress, not stuck
	const r = evaluateRunawayGuard(
		"discuss-milestone",
		"M001",
		makeMetrics({ sessionTokens: 2_940_000, changedFiles: 1 }),
		config,
		now + 180_000,
	);
	assert.equal(r.action, "none", "should not pause when changedFiles > 0");
});

test("returns none when worktreeChangedSinceStart === true despite token growth", () => {
	// The worktree fingerprint changed — dirty file content was modified.
	// This is progress even with 0 new changed files.
	resetRunawayGuardState("execute-task", "T01", makeBaseline());
	const config = makeConfig();
	const now = Date.now();

	// First diagnostic turn
	evaluateRunawayGuard(
		"execute-task",
		"T01",
		makeMetrics({ sessionTokens: 1_500_000, changedFiles: 0 }),
		config,
		now,
	);

	// Second: tokens grew, worktree changed (dirty content), no new files
	const r = evaluateRunawayGuard(
		"execute-task",
		"T01",
		makeMetrics({
			sessionTokens: 2_940_000,
			changedFiles: 0,
			worktreeChangedSinceStart: true,
		}),
		config,
		now + 180_000,
	);
	assert.equal(
		r.action,
		"none",
		"should not pause when worktreeChangedSinceStart === true",
	);
});

test("returns none when changedFiles is explicitly 0 but worktreeChangedSinceStart is false", () => {
	// No progress at all — this should NOT be caught by the progress check.
	// It should proceed to the hard-pause logic.
	resetRunawayGuardState("discuss-milestone", "M001", makeBaseline());
	const config = makeConfig();
	const now = Date.now();

	evaluateRunawayGuard(
		"discuss-milestone",
		"M001",
		makeMetrics({ sessionTokens: 1_500_000, changedFiles: 0 }),
		config,
		now,
	);

	const r = evaluateRunawayGuard(
		"discuss-milestone",
		"M001",
		makeMetrics({
			sessionTokens: 2_940_000,
			changedFiles: 0,
			worktreeChangedSinceStart: false,
		}),
		config,
		now + 180_000,
	);
	// No progress check match, but hardPause conditions may or may not fire
	// depending on hasMeaningfulGrowth — this test just ensures the progress
	// check doesn't erroneously match when changedFiles IS 0
	assert.ok(
		r.action === "pause" || r.action === "warn" || r.action === "none",
		`expected pause/warn/none, got ${r.action}`,
	);
});

test("discuss-milestone with file changes does not get hard-paused", () => {
	// Regression test for the exact SELF-FEEDBACK.md scenario:
	// discuss-milestone was hard-paused with 2.94M tokens and 67 tool calls
	// despite 1 new changed file and dirty file content modified.
	resetRunawayGuardState("discuss-milestone", "M001-6377a4", makeBaseline());
	const config = makeConfig();
	const now = Date.now();

	// First diagnostic turn (token growth begins)
	evaluateRunawayGuard(
		"discuss-milestone",
		"M001-6377a4",
		makeMetrics({ sessionTokens: 1_500_000 }),
		config,
		now,
	);

	// Second call: exactly the scenario from SELF-FEEDBACK.md
	// 2.94M tokens, 67 tool calls, 20min elapsed, 1 changed file
	const r = evaluateRunawayGuard(
		"discuss-milestone",
		"M001-6377a4",
		makeMetrics({
			sessionTokens: 2_940_000,
			toolCalls: 67,
			elapsedMs: 20 * 60 * 1000,
			changedFiles: 1,
			worktreeChangedSinceStart: true, // dirty content changed
		}),
		config,
		now + 180_000,
	);
	assert.equal(
		r.action,
		"none",
		"discuss-milestone with file changes should not be paused",
	);
});