Fix Codex review issues: phase-timeout mutation race and missing backfill

P1 (phase-timeout mutation race): withPhaseTimeout now stores the still-running
phase promise in _danglingPhasePromise when a timeout fires. Each loop iteration
drains that promise (with try/catch) before starting new work, preventing the
timed-out phase from mutating state concurrently with the next iteration.

P2 (verification_status backfill): Schema migration v17 now runs a backfill UPDATE
after adding the new column, deriving verification_status from existing
verification_evidence rows. Projects upgraded mid-slice will have correct
all_pass/partial/all_fail values immediately rather than empty strings that
bypass the prior-task guard.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-04-18 16:48:00 +02:00
parent ca5890df2e
commit 9a04fef925
2 changed files with 53 additions and 2 deletions

View file

@ -95,9 +95,19 @@ function checkMemoryPressure(): { pressured: boolean; heapMB: number; limitMB: n
return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct };
}
/**
* Tracks the dangling phase promise from the most recent timeout so the next
* iteration can drain it before proceeding. Promise.race() rejects on timeout
* but does not cancel the underlying async work; draining here prevents the
* timed-out phase from mutating state concurrently with the next iteration.
*/
let _danglingPhasePromise: Promise<unknown> | null = null;
/**
* Wrap a phase function with a timeout. Rejects with an Error whose message
* starts with "phase-timeout:" so the blanket catch can handle it specially.
* Stores the still-running phase promise in _danglingPhasePromise so the caller
* can drain it before starting a new iteration.
*/
async function withPhaseTimeout<T>(
name: string,
@ -105,6 +115,7 @@ async function withPhaseTimeout<T>(
timeoutMs: number,
): Promise<T> {
let timer: ReturnType<typeof setTimeout> | undefined;
const phasePromise = fn();
const timeout = new Promise<never>((_, reject) => {
timer = setTimeout(
() => reject(new Error(`phase-timeout:${name}`)),
@ -112,8 +123,12 @@ async function withPhaseTimeout<T>(
);
});
try {
const result = await Promise.race([fn(), timeout]);
return result;
return await Promise.race([phasePromise, timeout]);
} catch (err) {
if (err instanceof Error && err.message.startsWith("phase-timeout:")) {
_danglingPhasePromise = phasePromise;
}
throw err;
} finally {
if (timer !== undefined) clearTimeout(timer);
}
@ -232,6 +247,16 @@ export async function autoLoop(
break;
}
// ── Drain any dangling phase promise before starting new work ──
// Promise.race() on timeout does not cancel the underlying async fn; that
// fn keeps running and may mutate state after the loop has advanced.
// Awaiting its completion here ensures no concurrent state writes.
if (_danglingPhasePromise !== null) {
const dangling = _danglingPhasePromise;
_danglingPhasePromise = null;
try { await dangling; } catch { /* ignore — result is irrelevant */ }
}
try {
// ── Blanket try/catch: one bad iteration must not kill the session
const prefs = deps.loadEffectiveSFPreferences()?.preferences;

View file

@ -1003,6 +1003,32 @@ function migrateSchema(db: DbAdapter): void {
if (currentVersion < 17) {
ensureColumn(db, "tasks", "verification_status", `ALTER TABLE tasks ADD COLUMN verification_status TEXT NOT NULL DEFAULT ''`);
// Backfill verification_status from existing verification_evidence rows so the
// prior-task guard works on databases upgraded mid-project (not just new ones).
db.exec(`
UPDATE tasks
SET verification_status = CASE
WHEN (SELECT COUNT(*) FROM verification_evidence ve
WHERE ve.milestone_id = tasks.milestone_id
AND ve.slice_id = tasks.slice_id
AND ve.task_id = tasks.id) = 0
THEN ''
WHEN (SELECT COUNT(*) FROM verification_evidence ve
WHERE ve.milestone_id = tasks.milestone_id
AND ve.slice_id = tasks.slice_id
AND ve.task_id = tasks.id
AND ve.exit_code != 0) = 0
THEN 'all_pass'
WHEN (SELECT COUNT(*) FROM verification_evidence ve
WHERE ve.milestone_id = tasks.milestone_id
AND ve.slice_id = tasks.slice_id
AND ve.task_id = tasks.id
AND ve.exit_code = 0) > 0
THEN 'partial'
ELSE 'all_fail'
END
WHERE tasks.status IN ('complete', 'done')
`);
db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
":version": 17,
":applied_at": new Date().toISOString(),