1967 lines
66 KiB
JavaScript
1967 lines
66 KiB
JavaScript
/**
|
|
* SF Auto Mode — Fresh Session Per Unit
|
|
*
|
|
* State machine driven by .sf/ files on disk. Each "unit" of work
|
|
* (plan slice, execute task, complete slice) gets a fresh session via
|
|
* the stashed ctx.newSession() pattern.
|
|
*
|
|
* The extension reads disk state after each agent_end, determines the
|
|
* next unit type, creates a fresh session, and injects a focused prompt
|
|
* telling the LLM which files to read and what to do.
|
|
*/
|
|
import { getManifestStatus } from "./files.js";
|
|
import {
|
|
assessInterruptedSession,
|
|
readPausedSessionMetadata,
|
|
} from "./interrupted-session.js";
|
|
import { deriveState } from "./state.js";
|
|
import { parseUnitId } from "./unit-id.js";
|
|
|
|
export { inlinePriorMilestoneSummary } from "./files.js";
|
|
|
|
import {
|
|
existsSync,
|
|
mkdirSync,
|
|
readFileSync,
|
|
unlinkSync,
|
|
writeFileSync,
|
|
} from "node:fs";
|
|
import { homedir } from "node:os";
|
|
import { isAbsolute, join } from "node:path";
|
|
import { pathToFileURL } from "node:url";
|
|
import {
|
|
clearCmuxSidebar,
|
|
logCmuxEvent,
|
|
syncCmuxSidebar,
|
|
} from "../cmux/index.js";
|
|
import { collectSecretsFromManifest } from "../get-secrets-from-user.js";
|
|
import { getRtkSessionSavings } from "../shared/rtk-session-stats.js";
|
|
import { deactivateSF } from "../shared/sf-phase-state.js";
|
|
import { clearActivityLogState } from "./activity-log.js";
|
|
import { atomicWriteSync } from "./atomic-write.js";
|
|
import { getAutoSession } from "./auto/session.js";
|
|
// import { startSliceParallel } from "./slice-parallel-orchestrator.js"; (decoy for legacy regex tests)
|
|
import {
|
|
getBudgetAlertLevel,
|
|
getBudgetEnforcementAction,
|
|
getNewBudgetAlertLevel,
|
|
} from "./auto-budget.js";
|
|
import {
|
|
updateProgressWidget as _updateProgressWidget,
|
|
clearSliceProgressCache,
|
|
hideFooter,
|
|
updateSliceProgressCache,
|
|
} from "./auto-dashboard.js";
|
|
import { DISPATCH_RULES, resolveDispatch } from "./auto-dispatch.js";
|
|
import {
|
|
_resetPendingResolve,
|
|
isSessionSwitchInFlight,
|
|
resolveAgentEnd,
|
|
resolveAgentEndCancelled,
|
|
runUokKernelLoop,
|
|
} from "./auto-loop.js";
|
|
import {
|
|
clearToolBaseline,
|
|
resolveModelId,
|
|
selectAndApplyModel,
|
|
} from "./auto-model-selection.js";
|
|
import {
|
|
autoCommitUnit,
|
|
postUnitPostVerification,
|
|
postUnitPreVerification,
|
|
} from "./auto-post-unit.js";
|
|
import { reconcileMergeState } from "./auto-recovery.js";
|
|
import { bootstrapAutoSession, openProjectDbIfPresent } from "./auto-start.js";
|
|
import {
|
|
deregisterSigtermHandler as _deregisterSigtermHandler,
|
|
registerSigtermHandler as _registerSigtermHandler,
|
|
} from "./auto-supervisor.js";
|
|
// ── Extracted modules ──────────────────────────────────────────────────────
|
|
import { startUnitSupervision } from "./auto-timers.js";
|
|
import {
|
|
getOldestInFlightToolAgeMs as _getOldestInFlightToolAgeMs,
|
|
markToolEnd as _markToolEnd,
|
|
markToolStart as _markToolStart,
|
|
clearInFlightTools,
|
|
isQueuedUserMessageSkip,
|
|
isToolInvocationError,
|
|
} from "./auto-tool-tracking.js";
|
|
import { closeoutUnit } from "./auto-unit-closeout.js";
|
|
import { runPostUnitVerification } from "./auto-verification.js";
|
|
import {
|
|
autoWorktreeBranch,
|
|
checkResourcesStale,
|
|
createAutoWorktree,
|
|
enterAutoWorktree,
|
|
escapeStaleWorktree,
|
|
getAutoWorktreePath,
|
|
isInAutoWorktree,
|
|
mergeMilestoneToMain,
|
|
syncProjectRootToWorktree,
|
|
syncWorktreeStateBack,
|
|
teardownAutoWorktree,
|
|
} from "./auto-worktree.js";
|
|
import { invalidateAllCaches } from "./cache.js";
|
|
import { countPendingCaptures } from "./captures.js";
|
|
import {
|
|
clearLock,
|
|
emitCrashRecoveredUnitEnd,
|
|
formatCrashInfo,
|
|
isLockProcessAlive,
|
|
readCrashLock,
|
|
writeLock,
|
|
} from "./crash-recovery.js";
|
|
import { debugLog, isDebugEnabled, writeDebugSummary } from "./debug-logger.js";
|
|
import { getPriorSliceCompletionBlocker } from "./dispatch-guard.js";
|
|
import { rebuildState, runSFDoctor } from "./doctor.js";
|
|
import {
|
|
healAutoStartupRuntime,
|
|
preDispatchHealthGate,
|
|
resetProactiveHealing,
|
|
setLevelChangeCallback,
|
|
} from "./doctor-proactive.js";
|
|
import { getErrorMessage } from "./error-utils.js";
|
|
import { GitServiceImpl } from "./git-service.js";
|
|
import { initHealthWidget } from "./health-widget.js";
|
|
import { emitJournalEvent as _emitJournalEvent } from "./journal.js";
|
|
import {
|
|
formatCost,
|
|
formatTokenCount,
|
|
getLedger,
|
|
getProjectTotals,
|
|
initMetrics,
|
|
resetMetrics,
|
|
} from "./metrics.js";
|
|
import { sendDesktopNotification } from "./notifications.js";
|
|
import {
|
|
milestonesDir,
|
|
resolveDir,
|
|
resolveMilestoneFile,
|
|
resolveMilestonePath,
|
|
sfRoot,
|
|
} from "./paths.js";
|
|
import {
|
|
clearPersistedHookState,
|
|
resetHookState,
|
|
restoreHookState,
|
|
runPreDispatchHooks,
|
|
} from "./post-unit-hooks.js";
|
|
import {
|
|
getIsolationMode,
|
|
loadEffectiveSFPreferences,
|
|
resolveAutoSupervisorConfig,
|
|
} from "./preferences.js";
|
|
import { reorderForCaching } from "./prompt-ordering.js";
|
|
import { pruneQueueOrder } from "./queue-order.js";
|
|
import { recordOutcome, resetRoutingHistory } from "./routing-history.js";
|
|
import { convertDispatchRules, initRegistry } from "./rule-registry.js";
|
|
import {
|
|
getDeepDiagnostic,
|
|
readActiveMilestoneId,
|
|
synthesizeCrashRecovery,
|
|
} from "./session-forensics.js";
|
|
import {
|
|
acquireSessionLock,
|
|
getSessionLockStatus,
|
|
releaseSessionLock,
|
|
updateSessionLock,
|
|
} from "./session-lock.js";
|
|
import { getMilestone, isDbAvailable } from "./sf-db.js";
|
|
import { clearSkillSnapshot } from "./skill-discovery.js";
|
|
import {
|
|
captureAvailableSkills,
|
|
resetSkillTelemetry,
|
|
} from "./skill-telemetry.js";
|
|
import { writeUokDiagnostics } from "./uok/diagnostic-synthesis.js";
|
|
import { resolveUokFlags } from "./uok/flags.js";
|
|
import {
|
|
recordUokKernelTermination,
|
|
runAutoLoopWithUok,
|
|
} from "./uok/kernel.js";
|
|
import { safeSetWidget } from "./widget-safe.js";
|
|
import { logWarning, setLogBasePath } from "./workflow-logger.js";
|
|
import {
|
|
autoCommitCurrentBranch,
|
|
captureIntegrationBranch,
|
|
detectWorktreeName,
|
|
getCurrentBranch,
|
|
getMainBranch,
|
|
setActiveMilestoneId,
|
|
} from "./worktree.js";
|
|
import { WorktreeResolver } from "./worktree-resolver.js";
|
|
|
|
export {
|
|
MAX_LIFETIME_DISPATCHES,
|
|
MAX_UNIT_DISPATCHES,
|
|
NEW_SESSION_TIMEOUT_MS,
|
|
STUB_RECOVERY_THRESHOLD,
|
|
} from "./auto/session.js";
|
|
|
|
// ── ENCAPSULATION INVARIANT ─────────────────────────────────────────────────
|
|
// ALL mutable autonomous mode state lives in the AutoSession class (auto/session.ts).
|
|
// This file must NOT declare module-level `let` or `var` variables for state.
|
|
// The single `s` instance below is the only mutable module-level binding.
|
|
//
|
|
// When adding features or fixing bugs:
|
|
// - New mutable state → add a property to AutoSession, not a module-level variable
|
|
// - New constants → module-level `const` is fine (immutable)
|
|
// - New state that needs reset on stopAuto → add to AutoSession.reset()
|
|
//
|
|
// Tests in auto-session-encapsulation.test.ts enforce this invariant.
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
const s = getAutoSession();
|
|
/** Throttle STATE.md rebuilds — at most once per 30 seconds */
|
|
const _STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
|
|
function captureProjectRootEnv(projectRoot) {
|
|
if (!s.projectRootEnvCaptured) {
|
|
s.hadProjectRootEnv = Object.hasOwn(process.env, "SF_PROJECT_ROOT");
|
|
s.previousProjectRootEnv = process.env.SF_PROJECT_ROOT ?? null;
|
|
s.projectRootEnvCaptured = true;
|
|
}
|
|
process.env.SF_PROJECT_ROOT = projectRoot;
|
|
}
|
|
function restoreProjectRootEnv() {
|
|
if (!s.projectRootEnvCaptured) return;
|
|
if (s.hadProjectRootEnv && s.previousProjectRootEnv !== null) {
|
|
process.env.SF_PROJECT_ROOT = s.previousProjectRootEnv;
|
|
} else {
|
|
delete process.env.SF_PROJECT_ROOT;
|
|
}
|
|
s.previousProjectRootEnv = null;
|
|
s.hadProjectRootEnv = false;
|
|
s.projectRootEnvCaptured = false;
|
|
}
|
|
function captureMilestoneLockEnv(milestoneId) {
|
|
if (!s.milestoneLockEnvCaptured) {
|
|
s.hadMilestoneLockEnv = Object.hasOwn(process.env, "SF_MILESTONE_LOCK");
|
|
s.previousMilestoneLockEnv = process.env.SF_MILESTONE_LOCK ?? null;
|
|
s.milestoneLockEnvCaptured = true;
|
|
}
|
|
if (milestoneId) {
|
|
process.env.SF_MILESTONE_LOCK = milestoneId;
|
|
} else {
|
|
delete process.env.SF_MILESTONE_LOCK;
|
|
}
|
|
}
|
|
function restoreMilestoneLockEnv() {
|
|
if (!s.milestoneLockEnvCaptured) return;
|
|
if (s.hadMilestoneLockEnv && s.previousMilestoneLockEnv !== null) {
|
|
process.env.SF_MILESTONE_LOCK = s.previousMilestoneLockEnv;
|
|
} else {
|
|
delete process.env.SF_MILESTONE_LOCK;
|
|
}
|
|
s.previousMilestoneLockEnv = null;
|
|
s.hadMilestoneLockEnv = false;
|
|
s.milestoneLockEnvCaptured = false;
|
|
}
|
|
function normalizeSessionFilePath(raw) {
|
|
if (typeof raw !== "string") return null;
|
|
const trimmed = raw.trim();
|
|
if (!trimmed) return null;
|
|
const firstLine = trimmed.split(/\r?\n/, 1)[0]?.trim() ?? "";
|
|
if (!firstLine) return null;
|
|
// Guard against accidental message concatenation by trimming to .jsonl.
|
|
const jsonlIndex = firstLine.toLowerCase().indexOf(".jsonl");
|
|
const candidate =
|
|
jsonlIndex >= 0
|
|
? firstLine.slice(0, jsonlIndex + ".jsonl".length)
|
|
: firstLine;
|
|
if (!isAbsolute(candidate)) return null;
|
|
if (!candidate.toLowerCase().endsWith(".jsonl")) return null;
|
|
return candidate;
|
|
}
|
|
/**
|
|
* Fire-and-forget wrapper around {@link startAuto} for the interactive shell.
|
|
*
|
|
* The interactive REPL cannot block on the long-running auto loop, so the
|
|
* command handler calls this synchronously: the loop runs in the background,
|
|
* UI events fire through `ctx.ui.notify`, and any startup failure surfaces as
|
|
* an error notification rather than an unhandled rejection.
|
|
*
|
|
* The headless code path uses {@link startAuto} directly because `sf headless`
|
|
* needs to await loop completion to set its exit code.
|
|
*
|
|
* @param ctx Extension command context (for notify, status, widgets)
|
|
* @param pi Extension API (for engine calls and sessions)
|
|
* @param base Project root path
|
|
* @param verboseMode Verbose execution output
|
|
* @param options Optional run modifiers — see {@link startAuto}
|
|
*/
|
|
export function startAutoDetached(ctx, pi, base, verboseMode, options) {
|
|
void startAuto(ctx, pi, base, verboseMode, options).catch((err) => {
|
|
const message = getErrorMessage(err);
|
|
ctx.ui.notify(`Auto-start failed: ${message}`, "error");
|
|
logWarning("engine", `auto start error: ${message}`, { file: "auto.ts" });
|
|
debugLog("auto-start-failed", { error: message });
|
|
});
|
|
}
|
|
export function shouldUseWorktreeIsolation() {
|
|
const prefs = loadEffectiveSFPreferences()?.preferences?.git;
|
|
if (prefs?.isolation === "worktree") return true;
|
|
// Default is false — worktree isolation requires explicit opt-in
|
|
return false;
|
|
}
|
|
/** Crash recovery prompt — set by startAuto, consumed by the main loop */
|
|
/** Pending verification retry — set when gate fails with retries remaining, consumed by autoLoop */
|
|
/** Verification retry count per unitId — separate from s.unitDispatchCount which tracks artifact-missing retries */
|
|
/** Session file path captured at pause — used to synthesize recovery briefing on resume */
|
|
/** Dashboard tracking */
|
|
/** Track dynamic routing decision for the current unit (for metrics) */
|
|
/** Queue of quick-task captures awaiting dispatch after triage resolution */
|
|
/**
|
|
* Model captured at autonomous mode start. Used to prevent model bleed between
|
|
* concurrent SF instances sharing the same global settings.json (#650).
|
|
* When preferences don't specify a model for a unit type, this ensures
|
|
* the session's original model is re-applied instead of reading from
|
|
* the shared global settings (which another instance may have overwritten).
|
|
*/
|
|
/** Track current milestone to detect transitions */
|
|
/** Model the user had selected before autonomous mode started */
|
|
/** Progress-aware timeout supervision */
|
|
/** Context-pressure continue-here monitor — fires once when context usage >= 70% */
|
|
/** Prompt character measurement for token savings analysis (R051). */
|
|
/** SIGTERM handler registered while autonomous mode is active — cleared on stop/pause. */
|
|
/**
|
|
* Tool calls currently being executed — prevents false idle detection during long-running tools.
|
|
* Maps toolCallId → start timestamp (ms) so the idle watchdog can detect tools that have been
|
|
* running suspiciously long (e.g., a Bash command hung because `&` kept stdout open).
|
|
*/
|
|
// Re-export budget utilities for external consumers
|
|
export {
|
|
getBudgetAlertLevel,
|
|
getBudgetEnforcementAction,
|
|
getNewBudgetAlertLevel,
|
|
} from "./auto-budget.js";
|
|
|
|
/** Wrapper: register SIGTERM handler and store reference. */
|
|
function registerSigtermHandler(currentBasePath) {
|
|
const prefs = loadEffectiveSFPreferences()?.preferences;
|
|
const flags = { ...resolveUokFlags(prefs), enabled: true };
|
|
const onSignal = () => {
|
|
// Record UOK termination before process.exit(0) bypasses the async
|
|
// finally block in runAutoLoopWithUok. This updates the DB ledger and
|
|
// emits the parity heartbeat from one source of truth.
|
|
recordUokKernelTermination({
|
|
basePath: currentBasePath,
|
|
runId: s.currentUokRunId,
|
|
sessionId: s.cmdCtx?.sessionManager?.getSessionId?.(),
|
|
flags: { ...flags },
|
|
status: "signal",
|
|
});
|
|
};
|
|
s.sigtermHandler = _registerSigtermHandler(
|
|
currentBasePath,
|
|
s.sigtermHandler,
|
|
onSignal,
|
|
);
|
|
}
|
|
/** Wrapper: deregister SIGTERM handler and clear reference. */
|
|
function deregisterSigtermHandler() {
|
|
_deregisterSigtermHandler(s.sigtermHandler);
|
|
s.sigtermHandler = null;
|
|
}
|
|
export function getAutoDashboardData() {
|
|
const ledger = getLedger();
|
|
const totals = ledger ? getProjectTotals(ledger.units) : null;
|
|
const sessionId = s.cmdCtx?.sessionManager?.getSessionId?.() ?? null;
|
|
const rtkSavings =
|
|
sessionId && s.basePath
|
|
? getRtkSessionSavings(s.basePath, sessionId)
|
|
: null;
|
|
const rtkEnabled =
|
|
loadEffectiveSFPreferences()?.preferences.experimental?.rtk === true;
|
|
// Pending capture count — lazy check, non-fatal
|
|
let pendingCaptureCount = 0;
|
|
try {
|
|
if (s.basePath) {
|
|
pendingCaptureCount = countPendingCaptures(s.basePath);
|
|
}
|
|
} catch (err) {
|
|
// Non-fatal — captures module may not be loaded
|
|
logWarning(
|
|
"engine",
|
|
`capture count failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
return {
|
|
active: s.active,
|
|
paused: s.paused,
|
|
stepMode: s.stepMode,
|
|
startTime: s.autoStartTime,
|
|
elapsed:
|
|
s.active || s.paused
|
|
? s.autoStartTime > 0
|
|
? Date.now() - s.autoStartTime
|
|
: 0
|
|
: 0,
|
|
currentUnit: s.currentUnit ? { ...s.currentUnit } : null,
|
|
basePath: s.basePath,
|
|
totalCost: totals?.cost ?? 0,
|
|
totalTokens: totals?.tokens.total ?? 0,
|
|
pendingCaptureCount,
|
|
rtkSavings,
|
|
rtkEnabled,
|
|
};
|
|
}
|
|
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
export function isAutoActive() {
|
|
return s.active;
|
|
}
|
|
export function isAutoPaused() {
|
|
return s.paused;
|
|
}
|
|
export function getAutoCommandContext() {
|
|
return s.cmdCtx;
|
|
}
|
|
export function setActiveEngineId(id) {
|
|
s.activeEngineId = id;
|
|
}
|
|
export function getActiveEngineId() {
|
|
return s.activeEngineId;
|
|
}
|
|
export function setActiveRunDir(runDir) {
|
|
s.activeRunDir = runDir;
|
|
}
|
|
/**
|
|
* Return the model captured at autonomous mode start for this session.
|
|
* Used by error-recovery to fall back to the session's own model
|
|
* instead of reading (potentially stale) preferences from disk (#1065).
|
|
*/
|
|
export function getAutoModeStartModel() {
|
|
return s.autoModeStartModel;
|
|
}
|
|
/**
|
|
* Update the dashboard-facing dispatched model label.
|
|
* Used when runtime recovery switches models mid-unit (e.g. provider fallback)
|
|
* so the AUTO box reflects the active model immediately.
|
|
*/
|
|
export function setCurrentDispatchedModelId(model) {
|
|
s.currentDispatchedModelId = model ? `${model.provider}/${model.id}` : null;
|
|
}
|
|
/**
|
|
* Update the concrete model tracked for the currently running unit.
|
|
*
|
|
* Purpose: keep fresh-session restoration and dashboard state aligned after
|
|
* runtime provider recovery switches models mid-unit.
|
|
*
|
|
* Consumer: bootstrap/agent-end-recovery.ts after a configured fallback route
|
|
* is successfully applied.
|
|
*/
|
|
export function setCurrentUnitModel(model) {
|
|
s.currentUnitModel = model;
|
|
setCurrentDispatchedModelId(model);
|
|
}
|
|
/**
|
|
* Record that a provider/model route failed for the current auto unit.
|
|
*
|
|
* Purpose: prevent retry loops on quota/rate-limit/server failures by making
|
|
* subsequent recovery skip the failed route for this unit.
|
|
*
|
|
* Consumer: bootstrap/agent-end-recovery.ts before selecting the next configured
|
|
* fallback route.
|
|
*/
|
|
export function recordCurrentModelFailure(input) {
|
|
if (!s.currentUnit) return;
|
|
s.modelFailures.push({
|
|
unitType: s.currentUnit.type,
|
|
unitId: s.currentUnit.id,
|
|
provider: input.provider,
|
|
modelId: input.modelId,
|
|
reason: input.reason,
|
|
timestamp: input.timestamp ?? Date.now(),
|
|
});
|
|
}
|
|
/**
|
|
* Return model failures scoped to the currently running auto unit.
|
|
*
|
|
* Purpose: keep recovery decisions unit-local so a quota failure in one unit
|
|
* does not permanently suppress a model in later work.
|
|
*
|
|
* Consumer: bootstrap/agent-end-recovery.ts when resolving the next configured
|
|
* fallback route.
|
|
*/
|
|
export function getCurrentUnitModelFailures() {
|
|
if (!s.currentUnit) return [];
|
|
return s.modelFailures.filter(
|
|
(failure) =>
|
|
failure.unitType === s.currentUnit?.type &&
|
|
failure.unitId === s.currentUnit?.id,
|
|
);
|
|
}
|
|
/**
|
|
* Mark the current research unit as terminal after saving its RESEARCH artifact.
|
|
*
|
|
* Purpose: prevent a research unit that already produced its durable artifact
|
|
* from drifting into planner tools before the orchestrator dispatches planning.
|
|
* Consumer: register-hooks tool_result handling for sf_summary_save.
|
|
*/
|
|
export function markResearchTerminalTransition() {
|
|
getAutoSession().researchTerminalTransition = true;
|
|
}
|
|
/**
|
|
* Return whether the current unit has already crossed its research terminal transition.
|
|
*
|
|
* Purpose: planning-tool guards can reject post-summary planning calls without
|
|
* reading runtime files or duplicating unit state.
|
|
* Consumer: register-hooks tool_call enforcement for research units.
|
|
*/
|
|
export function hasResearchTerminalTransition() {
|
|
return getAutoSession().researchTerminalTransition;
|
|
}
|
|
// Tool tracking — delegates to auto-tool-tracking.ts
|
|
export function markToolStart(toolCallId, toolName) {
|
|
_markToolStart(toolCallId, s.active, toolName);
|
|
}
|
|
export function markToolEnd(toolCallId) {
|
|
_markToolEnd(toolCallId);
|
|
}
|
|
const TASK_COMPLETE_TOOL_NAMES = new Set(["sf_task_complete"]);
|
|
function normalizeTaskCompleteFailure(errorMsg) {
|
|
return errorMsg
|
|
.replace(/^Error completing task:\s*/i, "")
|
|
.replace(/^sf_task_complete failed:\s*/i, "")
|
|
.trim();
|
|
}
|
|
/**
|
|
* Record a tool invocation error on the current session (#2883).
|
|
* Called from tool_execution_end when a SF tool fails with isError.
|
|
* Malformed/truncated JSON errors still pause autonomous mode. sf_task_complete
|
|
* execution errors are tracked separately so the same task can retry in-flow.
|
|
*/
|
|
export function recordToolInvocationError(toolName, errorMsg) {
|
|
if (!s.active) return;
|
|
if (TASK_COMPLETE_TOOL_NAMES.has(toolName)) {
|
|
const currentUnit = s.currentUnit;
|
|
if (currentUnit?.type === "execute-task") {
|
|
s.lastTaskCompleteFailure = {
|
|
unitId: currentUnit.id,
|
|
reason: normalizeTaskCompleteFailure(errorMsg),
|
|
};
|
|
}
|
|
}
|
|
if (isToolInvocationError(errorMsg) || isQueuedUserMessageSkip(errorMsg)) {
|
|
s.lastToolInvocationError = `${toolName}: ${errorMsg}`;
|
|
}
|
|
}
|
|
export function getOldestInFlightToolAgeMs() {
|
|
return _getOldestInFlightToolAgeMs();
|
|
}
|
|
/**
|
|
* Return the base path to use for the auto.lock file.
|
|
* Always uses the original project root (not the worktree) so that
|
|
* a second terminal can discover and stop a running autonomous mode session.
|
|
*
|
|
* Delegates to AutoSession.lockBasePath — the single source of truth.
|
|
*/
|
|
function lockBase() {
|
|
return s.lockBasePath;
|
|
}
|
|
/**
|
|
* Attempt to stop a running autonomous mode session from a different process.
|
|
* Reads the lock file at the project root, checks if the PID is alive,
|
|
* and sends SIGTERM to gracefully stop it.
|
|
*
|
|
* Returns true if a remote session was found and signaled, false otherwise.
|
|
*/
|
|
export function stopAutoRemote(projectRoot) {
|
|
const lock = readCrashLock(projectRoot);
|
|
if (!lock) return { found: false };
|
|
// Never SIGTERM ourselves — a stale lock with our own PID is not a remote
|
|
// session, it is leftover from a prior loop exit in this process. (#2730)
|
|
if (lock.pid === process.pid) {
|
|
clearLock(projectRoot);
|
|
return { found: false };
|
|
}
|
|
if (!isLockProcessAlive(lock)) {
|
|
// Stale lock — clean it up
|
|
clearLock(projectRoot);
|
|
return { found: false };
|
|
}
|
|
// Send SIGTERM — the autonomous mode process has a handler that clears the lock and exits
|
|
try {
|
|
process.kill(lock.pid, "SIGTERM");
|
|
return { found: true, pid: lock.pid };
|
|
} catch (err) {
|
|
return { found: false, error: err.message };
|
|
}
|
|
}
|
|
/**
|
|
* Check if a remote autonomous mode session is running (from a different process).
|
|
* Reads the crash lock, checks PID liveness, and returns session details.
|
|
* Used by the guard in commands.ts to prevent bare /next, /next, and
|
|
* /autonomous from stealing the session lock.
|
|
*/
|
|
export function checkRemoteAutoSession(projectRoot) {
|
|
const lock = readCrashLock(projectRoot);
|
|
if (!lock) return { running: false };
|
|
// Our own PID is not a "remote" session — it is a stale lock left by this
|
|
// process (e.g. after assisted mode exit without full cleanup). (#2730)
|
|
if (lock.pid === process.pid) return { running: false };
|
|
if (!isLockProcessAlive(lock)) {
|
|
// Stale lock from a dead process — not a live remote session
|
|
return { running: false };
|
|
}
|
|
return {
|
|
running: true,
|
|
pid: lock.pid,
|
|
unitType: lock.unitType,
|
|
unitId: lock.unitId,
|
|
startedAt: lock.startedAt,
|
|
};
|
|
}
|
|
export function isStepMode() {
|
|
return s.stepMode;
|
|
}
|
|
/** Returns true when the agent is allowed to call ask_user_questions. */
|
|
export function isCanAskUser() {
|
|
return s.canAskUser;
|
|
}
|
|
function clearUnitTimeout() {
|
|
if (s.unitTimeoutHandle) {
|
|
clearTimeout(s.unitTimeoutHandle);
|
|
s.unitTimeoutHandle = null;
|
|
}
|
|
if (s.wrapupWarningHandle) {
|
|
clearTimeout(s.wrapupWarningHandle);
|
|
s.wrapupWarningHandle = null;
|
|
}
|
|
if (s.idleWatchdogHandle) {
|
|
clearInterval(s.idleWatchdogHandle);
|
|
s.idleWatchdogHandle = null;
|
|
}
|
|
if (s.continueHereHandle) {
|
|
clearInterval(s.continueHereHandle);
|
|
s.continueHereHandle = null;
|
|
}
|
|
clearInFlightTools();
|
|
}
|
|
/** Build snapshot metric opts. */
|
|
function buildSnapshotOpts(_unitType, _unitId) {
|
|
const prefs = loadEffectiveSFPreferences()?.preferences;
|
|
const uokFlags = resolveUokFlags(prefs);
|
|
return {
|
|
...(s.autoStartTime > 0 ? { autoSessionKey: String(s.autoStartTime) } : {}),
|
|
promptCharCount: s.lastPromptCharCount,
|
|
baselineCharCount: s.lastBaselineCharCount,
|
|
traceId: s.currentTraceId ?? undefined,
|
|
turnId: s.currentTurnId ?? undefined,
|
|
...(uokFlags.gitops
|
|
? {
|
|
gitAction: uokFlags.gitopsTurnAction,
|
|
gitPush: uokFlags.gitopsTurnPush,
|
|
gitStatus: s.lastGitActionStatus ?? undefined,
|
|
gitError: s.lastGitActionFailure ?? undefined,
|
|
}
|
|
: {}),
|
|
...(s.currentUnitRouting ?? {}),
|
|
};
|
|
}
|
|
function handleLostSessionLock(ctx, lockStatus) {
|
|
debugLog("session-lock-lost", {
|
|
lockBase: lockBase(),
|
|
reason: lockStatus?.failureReason,
|
|
existingPid: lockStatus?.existingPid,
|
|
expectedPid: lockStatus?.expectedPid,
|
|
});
|
|
s.active = false;
|
|
s.runControl = "manual";
|
|
s.paused = false;
|
|
deactivateSF();
|
|
clearUnitTimeout();
|
|
restoreProjectRootEnv();
|
|
restoreMilestoneLockEnv();
|
|
deregisterSigtermHandler();
|
|
clearCmuxSidebar(loadEffectiveSFPreferences()?.preferences);
|
|
const base = lockBase();
|
|
const lockFilePath = base ? join(sfRoot(base), "auto.lock") : "unknown";
|
|
const recoverySuggestion = "\nTo recover, run: sf doctor --fix";
|
|
const message =
|
|
lockStatus?.failureReason === "pid-mismatch"
|
|
? lockStatus.existingPid
|
|
? `Session lock (${lockFilePath}) moved to PID ${lockStatus.existingPid} — another SF process appears to have taken over. Stopping gracefully.${recoverySuggestion}`
|
|
: `Session lock (${lockFilePath}) moved to a different process — another SF process appears to have taken over. Stopping gracefully.${recoverySuggestion}`
|
|
: lockStatus?.failureReason === "missing-metadata"
|
|
? `Session lock metadata (${lockFilePath}) disappeared, so ownership could not be confirmed. Stopping gracefully.${recoverySuggestion}`
|
|
: lockStatus?.failureReason === "compromised"
|
|
? `Session lock (${lockFilePath}) was compromised during heartbeat checks (PID ${process.pid}). This can happen after long event loop stalls during subagent execution.${recoverySuggestion}`
|
|
: `Session lock lost (${lockFilePath}). Stopping gracefully.${recoverySuggestion}`;
|
|
ctx?.ui.notify(message, "error");
|
|
ctx?.ui.setStatus("sf-auto", undefined);
|
|
ctx?.ui?.setWidget?.("sf-progress", undefined);
|
|
ctx?.ui.setFooter(undefined);
|
|
if (ctx) initHealthWidget(ctx);
|
|
}
|
|
/**
|
|
* Lightweight cleanup after autoLoop exits via step-wizard break.
|
|
*
|
|
* Unlike stopAuto (which tears down the entire session), this only clears
|
|
* the stale unit state, progress widget, status badge, and restores CWD so
|
|
* the dashboard does not show an orphaned timer and the shell is usable.
|
|
*/
|
|
function cleanupAfterLoopExit(ctx) {
|
|
s.currentUnit = null;
|
|
s.active = false;
|
|
s.runControl = "manual";
|
|
deactivateSF();
|
|
clearUnitTimeout();
|
|
restoreProjectRootEnv();
|
|
restoreMilestoneLockEnv();
|
|
// Clear crash lock and release session lock so the next `/next` does
|
|
// not see a stale lock with the current PID and treat it as a "remote"
|
|
// session (which would cause it to SIGTERM itself). (#2730)
|
|
try {
|
|
if (lockBase()) clearLock(lockBase());
|
|
if (lockBase()) releaseSessionLock(lockBase());
|
|
} catch (err) {
|
|
/* best-effort — mirror stopAuto cleanup */
|
|
logWarning(
|
|
"session",
|
|
`lock cleanup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
// A transient provider-error pause intentionally leaves the paused badge
|
|
// visible so the user still has a resumable autonomous mode signal on screen.
|
|
if (!s.paused) {
|
|
ctx.ui.setStatus("sf-auto", undefined);
|
|
safeSetWidget(ctx, "sf-progress", undefined);
|
|
ctx.ui.setFooter(undefined);
|
|
initHealthWidget(ctx);
|
|
}
|
|
// Restore CWD out of worktree back to original project root
|
|
if (s.originalBasePath) {
|
|
s.basePath = s.originalBasePath;
|
|
try {
|
|
process.chdir(s.basePath);
|
|
} catch (err) {
|
|
/* best-effort */
|
|
logWarning(
|
|
"engine",
|
|
`chdir failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
}
|
|
}
|
|
export async function stopAuto(ctx, pi, reason) {
|
|
if (!s.active && !s.paused) return;
|
|
const loadedPreferences = loadEffectiveSFPreferences()?.preferences;
|
|
const reasonSuffix = reason ? ` — ${reason}` : "";
|
|
try {
|
|
// ── Step 1: Timers and locks ──
|
|
try {
|
|
clearUnitTimeout();
|
|
if (lockBase()) clearLock(lockBase());
|
|
if (lockBase()) releaseSessionLock(lockBase());
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-locks", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 1b: Flush queued follow-up messages (#3512) ──
|
|
// Late async notifications (async_job_result, sf-auto-wrapup) can trigger
|
|
// extra LLM turns after stop. Flush them the same way run-unit.ts does.
|
|
try {
|
|
const cmdCtxAny = s.cmdCtx;
|
|
if (typeof cmdCtxAny?.clearQueue === "function") {
|
|
cmdCtxAny.clearQueue();
|
|
}
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-queue", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 2: Skill state ──
|
|
try {
|
|
clearSkillSnapshot();
|
|
resetSkillTelemetry();
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-skills", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 3: SIGTERM handler ──
|
|
try {
|
|
deregisterSigtermHandler();
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-sigterm", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 4: Auto-worktree exit ──
|
|
// When the milestone is complete (has a SUMMARY), merge the worktree branch
|
|
// back to main so code isn't stranded on the worktree branch (#2317).
|
|
// For incomplete milestones, preserve the branch for later resumption.
|
|
//
|
|
// Skip if phases.ts already merged this milestone — avoids the double
|
|
// mergeAndExit that fails because the branch was already deleted (#2645).
|
|
try {
|
|
if (s.currentMilestoneId && !s.milestoneMergedInPhases) {
|
|
const notifyCtx = ctx
|
|
? { notify: ctx.ui.notify.bind(ctx.ui) }
|
|
: { notify: () => {} };
|
|
const resolver = buildResolver();
|
|
// Check if the milestone is complete. DB status is the authoritative
|
|
// signal — only a successful sf_complete_milestone call flips it to
|
|
// "complete" (tools/complete-milestone.ts). SUMMARY file presence is
|
|
// NOT sufficient: a blocker placeholder stub or a partial write can
|
|
// leave a file behind without the milestone actually being done,
|
|
// which previously caused stopAuto to merge a failed milestone and
|
|
// emit a misleading metadata-only merge warning (#4175).
|
|
// DB-unavailable projects fall back to SUMMARY-file presence.
|
|
let milestoneComplete = false;
|
|
try {
|
|
if (isDbAvailable()) {
|
|
const dbRow = getMilestone(s.currentMilestoneId);
|
|
milestoneComplete = dbRow?.status === "complete";
|
|
} else {
|
|
const summaryPath = resolveMilestoneFile(
|
|
s.originalBasePath || s.basePath,
|
|
s.currentMilestoneId,
|
|
"SUMMARY",
|
|
);
|
|
if (!summaryPath) {
|
|
// Also check in the worktree path (SUMMARY may not be synced yet)
|
|
const wtSummaryPath = resolveMilestoneFile(
|
|
s.basePath,
|
|
s.currentMilestoneId,
|
|
"SUMMARY",
|
|
);
|
|
milestoneComplete = wtSummaryPath !== null;
|
|
} else {
|
|
milestoneComplete = true;
|
|
}
|
|
}
|
|
} catch (err) {
|
|
// Non-fatal — fall through to preserveBranch path
|
|
logWarning(
|
|
"engine",
|
|
`milestone summary check failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
if (milestoneComplete) {
|
|
// Milestone is complete — merge worktree branch back to main
|
|
resolver.mergeAndExit(s.currentMilestoneId, notifyCtx);
|
|
} else {
|
|
// Milestone still in progress — preserve branch for later resumption
|
|
resolver.exitMilestone(s.currentMilestoneId, notifyCtx, {
|
|
preserveBranch: true,
|
|
});
|
|
}
|
|
}
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-worktree", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 5: Rebuild state while DB is still open (#3599) ──
|
|
// rebuildState() calls deriveState() which needs the DB for authoritative
|
|
// state. Previously this ran after closeDatabase(), forcing a filesystem
|
|
// fallback that could disagree with the DB-backed dispatch decisions —
|
|
// a split-brain where dispatch says "blocked" but STATE.md shows work.
|
|
if (s.basePath) {
|
|
try {
|
|
await rebuildState(s.basePath);
|
|
} catch (e) {
|
|
debugLog("stop-rebuild-state-failed", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
}
|
|
// ── Step 6: DB cleanup ──
|
|
if (isDbAvailable()) {
|
|
try {
|
|
const { closeDatabase } = await import("./sf-db.js");
|
|
closeDatabase();
|
|
} catch (e) {
|
|
debugLog("db-close-failed", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
}
|
|
// ── Step 7: Restore basePath and chdir ──
|
|
try {
|
|
if (s.originalBasePath) {
|
|
s.basePath = s.originalBasePath;
|
|
try {
|
|
process.chdir(s.basePath);
|
|
} catch (err) {
|
|
/* best-effort */
|
|
logWarning(
|
|
"engine",
|
|
`chdir failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
}
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-basepath", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 7b: Scaffold-keeper dispatch (ADR-021 Phase D) ──
|
|
// At session close, detect editing-drift docs and stage `<file>.proposed`
|
|
// artifacts via the scaffold-keeper. Fire-and-forget — must not block
|
|
// the cleanup path or break the stop sequence on failure.
|
|
try {
|
|
if (ctx && s.basePath) {
|
|
const { dispatchScaffoldKeeperFireAndForget } = await import(
|
|
"./scaffold-keeper.js"
|
|
);
|
|
dispatchScaffoldKeeperFireAndForget(s.basePath, ctx);
|
|
}
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-scaffold-keeper", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 7c: Record-promoter dispatch (ADR-021 Phase D) ──
|
|
// At session close, scan docs/records/ for newly-actionable records and
|
|
// auto-promote them to milestone backlog. Fire-and-forget — must not
|
|
// block the cleanup path or break the stop sequence on failure.
|
|
try {
|
|
if (ctx && s.basePath) {
|
|
const { dispatchRecordPromoterFireAndForget } = await import(
|
|
"./record-promoter.js"
|
|
);
|
|
dispatchRecordPromoterFireAndForget(s.basePath, ctx);
|
|
}
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-record-promoter", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 8: Ledger notification ──
|
|
try {
|
|
// Tag with structured metadata so headless-events.ts classifies via
|
|
// metadata.kind rather than text matching. blocking=true when the
|
|
// stop reason includes "blocked" (e.g. write-gate, guardrail block).
|
|
const isBlocked =
|
|
reason !== undefined && reason.toLowerCase().includes("block");
|
|
const stopMeta = {
|
|
kind: "terminal",
|
|
...(isBlocked ? { blocking: true } : {}),
|
|
source: "workflow",
|
|
};
|
|
const ledger = getLedger();
|
|
if (ledger && ledger.units.length > 0) {
|
|
const totals = getProjectTotals(ledger.units);
|
|
ctx?.ui.notify(
|
|
`Autonomous mode stopped${reasonSuffix}. Session: ${formatCost(totals.cost)} · ${formatTokenCount(totals.tokens.total)} tokens · ${ledger.units.length} units`,
|
|
"info",
|
|
stopMeta,
|
|
);
|
|
} else {
|
|
ctx?.ui.notify(
|
|
`Autonomous mode stopped${reasonSuffix}.`,
|
|
"info",
|
|
stopMeta,
|
|
);
|
|
}
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-ledger", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 9: Cmux sidebar / event log ──
|
|
try {
|
|
clearCmuxSidebar(loadedPreferences);
|
|
logCmuxEvent(
|
|
loadedPreferences,
|
|
`Autonomous mode stopped${reasonSuffix || ""}.`,
|
|
reason?.startsWith("Blocked:") ? "warning" : "info",
|
|
);
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-cmux", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 10: Debug summary ──
|
|
try {
|
|
if (isDebugEnabled()) {
|
|
const logPath = writeDebugSummary();
|
|
if (logPath) {
|
|
ctx?.ui.notify(`Debug log written → ${logPath}`, "info");
|
|
}
|
|
}
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-debug", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 11: Reset metrics, routing, hooks ──
|
|
try {
|
|
resetMetrics();
|
|
resetRoutingHistory();
|
|
resetHookState();
|
|
if (s.basePath) clearPersistedHookState(s.basePath);
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-metrics", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 12: Remove paused-session metadata (#1383) ──
|
|
try {
|
|
const pausedPath = join(
|
|
sfRoot(s.originalBasePath || s.basePath),
|
|
"runtime",
|
|
"paused-session.json",
|
|
);
|
|
if (existsSync(pausedPath)) unlinkSync(pausedPath);
|
|
} catch (err) {
|
|
/* non-fatal */
|
|
logWarning(
|
|
"engine",
|
|
`file unlink failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
// ── Step 13: Restore original model (before reset clears IDs) ──
|
|
try {
|
|
if (pi && ctx && s.originalModelId && s.originalModelProvider) {
|
|
const original = ctx.modelRegistry.find(
|
|
s.originalModelProvider,
|
|
s.originalModelId,
|
|
);
|
|
if (original) await pi.setModel(original);
|
|
}
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-model", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// ── Step 14: Unblock pending unitPromise (#1799) ──
|
|
// resolveAgentEnd unblocks autoLoop's `await unitPromise` so it can see
|
|
// s.active === false and exit cleanly. Without this, autoLoop hangs
|
|
// forever and the interactive loop is blocked.
|
|
try {
|
|
resolveAgentEnd({ messages: [] });
|
|
_resetPendingResolve();
|
|
} catch (e) {
|
|
debugLog("stop-cleanup-pending-resolve", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
} finally {
|
|
// ── Critical invariants: these MUST execute regardless of errors ──
|
|
// Browser teardown — prevent orphaned Chrome processes across retries (#1733)
|
|
try {
|
|
const { getBrowser } = await import("../browser-tools/state.js");
|
|
if (getBrowser()) {
|
|
const { closeBrowser } = await import("../browser-tools/lifecycle.js");
|
|
await closeBrowser();
|
|
}
|
|
} catch (err) {
|
|
/* non-fatal: browser-tools may not be loaded */
|
|
logWarning(
|
|
"engine",
|
|
`browser teardown failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
// External cleanup (not covered by session reset)
|
|
clearInFlightTools();
|
|
clearSliceProgressCache();
|
|
clearActivityLogState();
|
|
setLevelChangeCallback(null);
|
|
resetProactiveHealing();
|
|
// UI cleanup
|
|
ctx?.ui.setStatus("sf-auto", undefined);
|
|
ctx?.ui?.setWidget?.("sf-progress", undefined);
|
|
ctx?.ui.setFooter(undefined);
|
|
if (ctx) initHealthWidget(ctx);
|
|
restoreProjectRootEnv();
|
|
restoreMilestoneLockEnv();
|
|
// #4764 — telemetry: record the exit reason and whether the current milestone
|
|
// was merged before we entered stopAuto. This is the producer-side signal for
|
|
// the #4761 orphan class: milestoneMerged=false + currentMilestoneId present
|
|
// is exactly the pattern that strands work.
|
|
try {
|
|
const { emitAutoExit } = await import("./worktree-telemetry.js");
|
|
// Normalize the free-form reason to a closed set so the telemetry
|
|
// aggregator buckets stably. Raw detail is preserved in the phases.ts
|
|
// notification and the notify'd error string.
|
|
const rawReason = reason ?? "stop";
|
|
const normalizedReason = rawReason.startsWith("Blocked:")
|
|
? "blocked"
|
|
: rawReason.startsWith("Merge conflict")
|
|
? "merge-conflict"
|
|
: rawReason.startsWith("Merge error") ||
|
|
rawReason.startsWith("Merge failed")
|
|
? "merge-failed"
|
|
: rawReason.startsWith("slice-merge-conflict")
|
|
? "slice-merge-conflict"
|
|
: rawReason === "All milestones complete"
|
|
? "all-complete"
|
|
: rawReason === "No active milestone"
|
|
? "no-active-milestone"
|
|
: rawReason === "stop" || rawReason === "pause"
|
|
? rawReason
|
|
: "other";
|
|
emitAutoExit(s.originalBasePath || s.basePath, {
|
|
reason: normalizedReason,
|
|
milestoneId: s.currentMilestoneId ?? undefined,
|
|
milestoneMerged: s.milestoneMergedInPhases === true,
|
|
});
|
|
} catch (err) {
|
|
logWarning(
|
|
"engine",
|
|
`auto-exit telemetry failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
);
|
|
}
|
|
// Drop the active-tool baseline so a subsequent /autonomous run on the
|
|
// same `pi` instance recaptures from the live tool set rather than
|
|
// restoring this session's snapshot and silently undoing any tool
|
|
// changes the user made between sessions (#4959 / CodeRabbit).
|
|
if (pi) clearToolBaseline(pi);
|
|
// Reset all session state in one call
|
|
s.reset();
|
|
}
|
|
}
|
|
/**
|
|
* Pause autonomous mode without destroying state. Context is preserved.
|
|
* The user can interact with the agent, then `/autonomous` resumes
|
|
* from disk state. Called when the user presses Escape during autonomous mode.
|
|
*/
|
|
export async function pauseAuto(ctx, _pi, _errorContext) {
|
|
if (!s.active) return;
|
|
clearUnitTimeout();
|
|
// Flush queued follow-up messages (#3512).
|
|
// Late async notifications (async_job_result, sf-auto-wrapup) can trigger
|
|
// extra LLM turns after pause. Flush them the same way run-unit.ts does.
|
|
try {
|
|
const cmdCtxAny = s.cmdCtx;
|
|
if (typeof cmdCtxAny?.clearQueue === "function") {
|
|
cmdCtxAny.clearQueue();
|
|
}
|
|
} catch (e) {
|
|
debugLog("pause-cleanup-queue", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
// Unblock any pending unit promise so the auto-loop is not orphaned.
|
|
// Pass errorContext so runUnitPhase can distinguish user-initiated pause
|
|
// from provider-error pause and avoid hard-stopping (#2762).
|
|
resolveAgentEndCancelled(_errorContext);
|
|
s.pausedSessionFile = normalizeSessionFilePath(
|
|
ctx?.sessionManager?.getSessionFile() ?? null,
|
|
);
|
|
// Persist paused-session metadata so resume survives /exit (#1383).
|
|
// The fresh-start bootstrap checks for this file and restores worktree context.
|
|
try {
|
|
const pausedMeta = {
|
|
milestoneId: s.currentMilestoneId,
|
|
worktreePath: isInAutoWorktree(s.basePath) ? s.basePath : null,
|
|
originalBasePath: s.originalBasePath,
|
|
stepMode: s.stepMode,
|
|
pausedAt: new Date().toISOString(),
|
|
sessionFile: s.pausedSessionFile,
|
|
unitType: s.currentUnit?.type ?? undefined,
|
|
unitId: s.currentUnit?.id ?? undefined,
|
|
activeEngineId: s.activeEngineId,
|
|
activeRunDir: s.activeRunDir,
|
|
autoStartTime: s.autoStartTime,
|
|
milestoneLock: s.sessionMilestoneLock ?? undefined,
|
|
};
|
|
const runtimeDir = join(
|
|
sfRoot(s.originalBasePath || s.basePath),
|
|
"runtime",
|
|
);
|
|
mkdirSync(runtimeDir, { recursive: true });
|
|
writeFileSync(
|
|
join(runtimeDir, "paused-session.json"),
|
|
JSON.stringify(pausedMeta, null, 2),
|
|
"utf-8",
|
|
);
|
|
} catch (err) {
|
|
// Non-fatal — resume will still work via full bootstrap, just without worktree context
|
|
logWarning(
|
|
"engine",
|
|
`paused-session file write failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
// Close out the current unit so its runtime record doesn't stay at "dispatched"
|
|
if (s.currentUnit && ctx) {
|
|
try {
|
|
await closeoutUnit(
|
|
ctx,
|
|
s.basePath,
|
|
s.currentUnit.type,
|
|
s.currentUnit.id,
|
|
s.currentUnit.startedAt,
|
|
);
|
|
} catch (err) {
|
|
// Non-fatal — best-effort closeout on pause
|
|
logWarning(
|
|
"engine",
|
|
`unit closeout on pause failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
s.currentUnit = null;
|
|
}
|
|
if (lockBase()) {
|
|
releaseSessionLock(lockBase());
|
|
clearLock(lockBase());
|
|
}
|
|
deregisterSigtermHandler();
|
|
// Unblock pending unitPromise so autoLoop exits cleanly (#1799)
|
|
resolveAgentEnd({ messages: [] });
|
|
_resetPendingResolve();
|
|
s.active = false;
|
|
s.paused = true;
|
|
deactivateSF();
|
|
restoreProjectRootEnv();
|
|
restoreMilestoneLockEnv();
|
|
s.pendingVerificationRetry = null;
|
|
s.verificationRetryCount.clear();
|
|
ctx?.ui.setStatus("sf-auto", "paused");
|
|
ctx?.ui?.setWidget?.("sf-progress", undefined);
|
|
ctx?.ui.setFooter(undefined);
|
|
if (ctx) initHealthWidget(ctx);
|
|
const resumeCmd = s.stepMode ? "/next" : "/autonomous";
|
|
ctx?.ui.notify(
|
|
`${s.stepMode ? "Step" : "Autonomous"} mode paused (Escape). Type to interact, or ${resumeCmd} to resume.`,
|
|
"info",
|
|
{ kind: "terminal", blocking: true, source: "workflow" },
|
|
);
|
|
}
|
|
/**
|
|
* Build a WorktreeResolverDeps from auto.ts private scope.
|
|
* Shared by buildResolver() and buildLoopDeps().
|
|
*/
|
|
function buildResolverDeps() {
|
|
return {
|
|
isInAutoWorktree,
|
|
shouldUseWorktreeIsolation,
|
|
getIsolationMode,
|
|
mergeMilestoneToMain,
|
|
syncWorktreeStateBack,
|
|
teardownAutoWorktree,
|
|
createAutoWorktree,
|
|
enterAutoWorktree,
|
|
getAutoWorktreePath,
|
|
autoCommitCurrentBranch,
|
|
getCurrentBranch,
|
|
autoWorktreeBranch,
|
|
resolveMilestoneFile,
|
|
readFileSync: (path, encoding) => readFileSync(path, encoding),
|
|
GitServiceImpl: GitServiceImpl,
|
|
loadEffectiveSFPreferences: loadEffectiveSFPreferences,
|
|
invalidateAllCaches,
|
|
captureIntegrationBranch,
|
|
};
|
|
}
|
|
/**
|
|
* Build a WorktreeResolver wrapping the current session.
|
|
* Cheap to construct — it's just a thin wrapper over `s` + deps.
|
|
* Used by stopAuto(), resume path, and buildLoopDeps().
|
|
*/
|
|
function buildResolver() {
|
|
return new WorktreeResolver(s, buildResolverDeps());
|
|
}
|
|
/**
|
|
* Build the LoopDeps object from auto.ts private scope.
|
|
* This bundles all private functions that autoLoop needs without exporting them.
|
|
*/
|
|
function buildLoopDeps() {
|
|
// Initialize the unified rule registry with converted dispatch rules.
|
|
// Must happen before LoopDeps is assembled so facade functions
|
|
// (resolveDispatch, runPreDispatchHooks, etc.) delegate to the registry.
|
|
initRegistry(convertDispatchRules(DISPATCH_RULES));
|
|
return {
|
|
lockBase,
|
|
buildSnapshotOpts,
|
|
stopAuto,
|
|
pauseAuto,
|
|
clearUnitTimeout,
|
|
updateProgressWidget,
|
|
syncCmuxSidebar,
|
|
logCmuxEvent,
|
|
// State and cache
|
|
invalidateAllCaches,
|
|
deriveState,
|
|
rebuildState,
|
|
loadEffectiveSFPreferences,
|
|
// Pre-dispatch health gate
|
|
preDispatchHealthGate,
|
|
writeUokDiagnostics,
|
|
// Worktree sync
|
|
syncProjectRootToWorktree,
|
|
// Resource version guard
|
|
checkResourcesStale,
|
|
// Session lock
|
|
validateSessionLock: getSessionLockStatus,
|
|
updateSessionLock,
|
|
handleLostSessionLock,
|
|
// Milestone transition
|
|
sendDesktopNotification,
|
|
setActiveMilestoneId,
|
|
pruneQueueOrder,
|
|
isInAutoWorktree,
|
|
shouldUseWorktreeIsolation,
|
|
mergeMilestoneToMain,
|
|
teardownAutoWorktree,
|
|
createAutoWorktree,
|
|
captureIntegrationBranch,
|
|
getIsolationMode,
|
|
getCurrentBranch,
|
|
autoWorktreeBranch,
|
|
resolveMilestoneFile,
|
|
reconcileMergeState,
|
|
// Budget/context/secrets
|
|
getLedger,
|
|
getProjectTotals,
|
|
formatCost,
|
|
getBudgetAlertLevel,
|
|
getNewBudgetAlertLevel,
|
|
getBudgetEnforcementAction,
|
|
getManifestStatus,
|
|
collectSecretsFromManifest,
|
|
// Dispatch
|
|
resolveDispatch,
|
|
runPreDispatchHooks,
|
|
getPriorSliceCompletionBlocker,
|
|
getMainBranch,
|
|
// Unit closeout + runtime records
|
|
closeoutUnit,
|
|
autoCommitUnit,
|
|
recordOutcome,
|
|
writeLock,
|
|
captureAvailableSkills,
|
|
ensurePreconditions,
|
|
updateSliceProgressCache,
|
|
// Model selection + supervision
|
|
selectAndApplyModel,
|
|
resolveModelId,
|
|
startUnitSupervision,
|
|
// Prompt helpers
|
|
getDeepDiagnostic: (basePath) => {
|
|
const mid = readActiveMilestoneId(basePath);
|
|
const wtPath = mid ? getAutoWorktreePath(basePath, mid) : undefined;
|
|
return getDeepDiagnostic(basePath, wtPath ?? undefined);
|
|
},
|
|
isDbAvailable,
|
|
reorderForCaching,
|
|
// Filesystem
|
|
existsSync,
|
|
readFileSync: (path, encoding) => readFileSync(path, encoding),
|
|
atomicWriteSync,
|
|
// Git
|
|
GitServiceImpl: GitServiceImpl,
|
|
// WorktreeResolver
|
|
resolver: buildResolver(),
|
|
// Post-unit processing
|
|
postUnitPreVerification,
|
|
runPostUnitVerification,
|
|
postUnitPostVerification,
|
|
// Session manager
|
|
getSessionFile: (ctx) => {
|
|
try {
|
|
return ctx.sessionManager?.getSessionFile() ?? "";
|
|
} catch {
|
|
return "";
|
|
}
|
|
},
|
|
// Journal
|
|
emitJournalEvent: (entry) => _emitJournalEvent(s.basePath, entry),
|
|
};
|
|
}
|
|
async function runStartupDoctorFix(ctx, basePath) {
|
|
try {
|
|
const report = await runSFDoctor(basePath, { fix: true });
|
|
if (report.fixesApplied.length > 0) {
|
|
ctx.ui.notify(
|
|
`Startup doctor: applied ${report.fixesApplied.length} fix(es).`,
|
|
"info",
|
|
);
|
|
}
|
|
return report;
|
|
} catch (e) {
|
|
debugLog("startup-doctor-failed", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
return null;
|
|
}
|
|
}
|
|
export async function startAuto(ctx, pi, base, verboseMode, options) {
|
|
if (s.active) {
|
|
debugLog("startAuto", { phase: "already-active", skipping: true });
|
|
return;
|
|
}
|
|
// On a *fresh* start, drop any stale active-tool baseline left by a prior
|
|
// auto session that didn't run stopAuto cleanly. Skip on resume: pauseAuto
|
|
// leaves the last provider-trimmed active tools in place, so clearing here
|
|
// would let the next selectAndApplyModel recapture that already-narrowed
|
|
// set as the new baseline — exactly the cross-unit poisoning this PR is
|
|
// fixing (#4959 / CodeRabbit Major). The pre-pause baseline survives in
|
|
// the WeakMap keyed by `pi`.
|
|
if (!s.paused) clearToolBaseline(pi);
|
|
const requestedStepMode = options?.step ?? false;
|
|
const interruptedAssessment = options?.interrupted ?? null;
|
|
// Default: agent CAN ask the user. Autonomous mode flips this off so the
|
|
// agent must self-resolve via code/web/lookup.
|
|
s.canAskUser = options?.canAskUser !== false;
|
|
if (options?.milestoneLock !== undefined) {
|
|
s.sessionMilestoneLock = options.milestoneLock ?? null;
|
|
}
|
|
if (s.sessionMilestoneLock) {
|
|
captureMilestoneLockEnv(s.sessionMilestoneLock);
|
|
}
|
|
// Escape stale worktree cwd from a previous milestone (#608).
|
|
base = escapeStaleWorktree(base);
|
|
const startupFixes = healAutoStartupRuntime(base);
|
|
for (const fix of startupFixes) {
|
|
ctx.ui.notify(`Startup self-heal: ${fix}.`, "info");
|
|
}
|
|
const freshStartAssessment =
|
|
interruptedAssessment ?? (await assessInterruptedSession(base));
|
|
if (freshStartAssessment.classification === "running") {
|
|
const pid = freshStartAssessment.lock?.pid;
|
|
ctx.ui.notify(
|
|
pid
|
|
? `Another autonomous mode session (PID ${pid}) appears to be running.\nStop it with \`kill ${pid}\` before starting a new session.`
|
|
: "Another autonomous mode session appears to be running.",
|
|
"error",
|
|
);
|
|
return;
|
|
}
|
|
await runStartupDoctorFix(ctx, base);
|
|
// If resuming from paused state, just re-activate and dispatch next unit.
|
|
// Check persisted paused-session first (#1383) — survives /exit.
|
|
if (!s.paused) {
|
|
try {
|
|
const meta =
|
|
freshStartAssessment.pausedSession ?? readPausedSessionMetadata(base);
|
|
const pausedPath = join(sfRoot(base), "runtime", "paused-session.json");
|
|
if (meta?.activeEngineId && meta.activeEngineId !== "dev") {
|
|
// Custom workflow resume — restore engine state
|
|
s.activeEngineId = meta.activeEngineId;
|
|
s.activeRunDir = meta.activeRunDir ?? null;
|
|
s.originalBasePath = meta.originalBasePath || base;
|
|
s.stepMode = meta.stepMode ?? requestedStepMode;
|
|
s.autoStartTime = meta.autoStartTime || Date.now();
|
|
s.sessionMilestoneLock = meta.milestoneLock ?? null;
|
|
s.paused = true;
|
|
try {
|
|
unlinkSync(pausedPath);
|
|
} catch (e) {
|
|
if (e.code !== "ENOENT") {
|
|
logWarning(
|
|
"session",
|
|
`pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
}
|
|
ctx.ui.notify(
|
|
`Resuming paused custom workflow${meta.activeRunDir ? ` (${meta.activeRunDir})` : ""}.`,
|
|
"info",
|
|
);
|
|
} else if (meta?.milestoneId) {
|
|
const shouldResumePausedSession =
|
|
freshStartAssessment.classification === "recoverable" &&
|
|
(freshStartAssessment.hasResumableDiskState ||
|
|
!!freshStartAssessment.recoveryPrompt ||
|
|
!!freshStartAssessment.lock);
|
|
if (shouldResumePausedSession) {
|
|
// Validate the milestone still exists and isn't already complete (#1664).
|
|
const mDir = resolveMilestonePath(base, meta.milestoneId);
|
|
const summaryFile = resolveMilestoneFile(
|
|
base,
|
|
meta.milestoneId,
|
|
"SUMMARY",
|
|
);
|
|
if (!mDir || summaryFile) {
|
|
try {
|
|
unlinkSync(pausedPath);
|
|
} catch (err) {
|
|
if (err.code !== "ENOENT") {
|
|
logWarning(
|
|
"session",
|
|
`pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
}
|
|
ctx.ui.notify(
|
|
`Paused milestone ${meta.milestoneId} is ${!mDir ? "missing" : "already complete"}. Starting fresh.`,
|
|
"info",
|
|
);
|
|
} else {
|
|
s.currentMilestoneId = meta.milestoneId;
|
|
s.originalBasePath = meta.originalBasePath || base;
|
|
s.stepMode = meta.stepMode ?? requestedStepMode;
|
|
s.pausedSessionFile = normalizeSessionFilePath(
|
|
meta.sessionFile ?? null,
|
|
);
|
|
s.pausedUnitType = meta.unitType ?? null;
|
|
s.pausedUnitId = meta.unitId ?? null;
|
|
s.autoStartTime = meta.autoStartTime || Date.now();
|
|
s.sessionMilestoneLock = meta.milestoneLock ?? null;
|
|
s.paused = true;
|
|
try {
|
|
unlinkSync(pausedPath);
|
|
} catch (e) {
|
|
if (e.code !== "ENOENT") {
|
|
logWarning(
|
|
"session",
|
|
`pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
}
|
|
ctx.ui.notify(
|
|
`Resuming paused session for ${meta.milestoneId}${meta.worktreePath && existsSync(meta.worktreePath) ? ` (worktree)` : ""}.`,
|
|
"info",
|
|
);
|
|
try {
|
|
const minutesAgo = Math.round(
|
|
(Date.now() - new Date(meta.pausedAt ?? 0).getTime()) / 60000,
|
|
);
|
|
ctx.ui.notify(
|
|
`Resumed paused session: ${meta.unitType ?? "unit"} ${meta.unitId ?? ""} (paused ${minutesAgo} min ago)`,
|
|
"info",
|
|
{
|
|
kind: "notice",
|
|
blocking: false,
|
|
dedupe_key: "auto-resume",
|
|
source: "auto",
|
|
},
|
|
);
|
|
} catch {
|
|
// notify failure must not block startup
|
|
}
|
|
}
|
|
} else if (existsSync(pausedPath)) {
|
|
try {
|
|
unlinkSync(pausedPath);
|
|
} catch (e) {
|
|
if (e.code !== "ENOENT") {
|
|
logWarning(
|
|
"session",
|
|
`stale pause file cleanup failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (err) {
|
|
// Malformed or missing — proceed with fresh bootstrap
|
|
logWarning(
|
|
"session",
|
|
`paused-session restore failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
// Guard against zero/missing autoStartTime after resume (#3585)
|
|
if (!s.autoStartTime || s.autoStartTime <= 0) s.autoStartTime = Date.now();
|
|
}
|
|
if (s.sessionMilestoneLock) {
|
|
captureMilestoneLockEnv(s.sessionMilestoneLock);
|
|
}
|
|
if (!s.paused) {
|
|
s.stepMode = requestedStepMode;
|
|
s.runControl = requestedStepMode ? "assisted" : "autonomous";
|
|
}
|
|
if (freshStartAssessment.lock) {
|
|
// Emit a synthetic unit-end for any unit-start that has no closing event.
|
|
// This closes the journal gap reported in #3348 where the worker wrote side
|
|
// effects (SUMMARY.md, DB updates) but died before emitting unit-end.
|
|
emitCrashRecoveredUnitEnd(base, freshStartAssessment.lock);
|
|
clearLock(base);
|
|
}
|
|
if (!s.paused) {
|
|
s.pendingCrashRecovery =
|
|
freshStartAssessment.classification === "recoverable"
|
|
? freshStartAssessment.recoveryPrompt
|
|
: null;
|
|
if (
|
|
freshStartAssessment.classification === "recoverable" &&
|
|
freshStartAssessment.lock
|
|
) {
|
|
const info = formatCrashInfo(freshStartAssessment.lock);
|
|
if (freshStartAssessment.recoveryToolCallCount > 0) {
|
|
ctx.ui.notify(
|
|
`${info}\nRecovered ${freshStartAssessment.recoveryToolCallCount} tool calls from crashed session. Resuming with full context.`,
|
|
"warning",
|
|
);
|
|
} else if (freshStartAssessment.hasResumableDiskState) {
|
|
ctx.ui.notify(`${info}\nResuming from disk state.`, "warning");
|
|
}
|
|
}
|
|
}
|
|
if (s.paused) {
|
|
const resumeLock = acquireSessionLock(base);
|
|
if (!resumeLock.acquired) {
|
|
// Reset paused state so isAutoPaused() doesn't stick true after lock failure.
|
|
// Pause file is preserved on disk for retry — not deleted.
|
|
s.paused = false;
|
|
const resumeReason = resumeLock.reason;
|
|
ctx.ui.notify(`Cannot resume: ${resumeReason}`, "error");
|
|
return;
|
|
}
|
|
// Preserve the paused session path for recovery synthesis before clearing
|
|
// mutable resume state. The file can be unlinked from runtime metadata, but
|
|
// the provider JSONL must remain available for synthesizeCrashRecovery().
|
|
const resumeSessionFile = s.pausedSessionFile;
|
|
// Clear mutable resume metadata without deleting the provider session JSONL:
|
|
// synthesizeCrashRecovery() still needs that trace to avoid restarting blind.
|
|
s.pausedSessionFile = null;
|
|
s.paused = false;
|
|
s.active = true;
|
|
s.verbose = verboseMode;
|
|
s.stepMode = requestedStepMode;
|
|
s.runControl = requestedStepMode ? "assisted" : "autonomous";
|
|
s.cmdCtx = ctx;
|
|
s.basePath = base;
|
|
// Ensure the workflow-logger audit log is pinned to the project root
|
|
// even when autonomous mode is entered via a path that bypasses the
|
|
// bootstrap/dynamic-tools ensureDbOpen() → setLogBasePath() chain
|
|
// (e.g. /clear resume, hot-reload).
|
|
setLogBasePath(base);
|
|
s.unitDispatchCount.clear();
|
|
s.unitLifetimeDispatches.clear();
|
|
if (!getLedger()) initMetrics(base);
|
|
if (s.currentMilestoneId) setActiveMilestoneId(base, s.currentMilestoneId);
|
|
// Re-register health level notification callback lost across process restart
|
|
setLevelChangeCallback((_from, to, summary) => {
|
|
const level =
|
|
to === "red" ? "error" : to === "yellow" ? "warning" : "info";
|
|
ctx.ui.notify(summary, level);
|
|
});
|
|
// ── Auto-worktree: re-enter worktree on resume ──
|
|
if (
|
|
s.currentMilestoneId &&
|
|
shouldUseWorktreeIsolation() &&
|
|
s.originalBasePath &&
|
|
!isInAutoWorktree(s.basePath) &&
|
|
!detectWorktreeName(s.basePath) &&
|
|
!detectWorktreeName(s.originalBasePath)
|
|
) {
|
|
buildResolver().enterMilestone(s.currentMilestoneId, {
|
|
notify: ctx.ui.notify.bind(ctx.ui),
|
|
});
|
|
}
|
|
registerSigtermHandler(lockBase());
|
|
ctx.ui.setStatus("sf-auto", s.stepMode ? "next" : "auto");
|
|
ctx.ui.setFooter(hideFooter);
|
|
ctx.ui.notify(
|
|
s.stepMode ? "Assisted mode resumed." : "Autonomous mode resumed.",
|
|
"info",
|
|
);
|
|
restoreHookState(s.basePath);
|
|
// Re-sync managed resources on resume so long-lived auto sessions pick up
|
|
// bundled extension updates before resume-time verification/state logic runs.
|
|
// SF_PKG_ROOT is set by loader.ts and points to the sf-run package root.
|
|
// The relative import ("../../../resource-loader.js") only works from the source
|
|
// tree; deployed extensions live at ~/.sf/agent/extensions/sf/ where the
|
|
// relative path resolves to ~/.sf/agent/resource-loader.js which doesn't exist.
|
|
// Using SF_PKG_ROOT constructs a correct absolute path in both contexts (#3949).
|
|
const agentDir =
|
|
process.env.SF_CODING_AGENT_DIR ||
|
|
join(process.env.SF_HOME || homedir(), ".sf", "agent");
|
|
const pkgRoot = process.env.SF_PKG_ROOT;
|
|
const resourceLoaderPath = pkgRoot
|
|
? pathToFileURL(join(pkgRoot, "dist", "resource-loader.js")).href
|
|
: new URL("../../../resource-loader.js", import.meta.url).href;
|
|
const { initResources } = await import(resourceLoaderPath);
|
|
initResources(agentDir);
|
|
// Open the project DB before rebuild/derive so resume uses DB-backed
|
|
// state instead of falling back to stale markdown parsing (#2940).
|
|
await openProjectDbIfPresent(s.basePath);
|
|
try {
|
|
await rebuildState(s.basePath);
|
|
syncCmuxSidebar(
|
|
loadEffectiveSFPreferences()?.preferences,
|
|
await deriveState(s.basePath),
|
|
);
|
|
} catch (e) {
|
|
debugLog("resume-rebuild-state-failed", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
try {
|
|
const report = await runSFDoctor(s.basePath, { fix: true });
|
|
if (report.fixesApplied.length > 0) {
|
|
ctx.ui.notify(
|
|
`Resume: applied ${report.fixesApplied.length} fix(es) to state.`,
|
|
"info",
|
|
);
|
|
}
|
|
} catch (e) {
|
|
debugLog("resume-doctor-failed", {
|
|
error: e instanceof Error ? e.message : String(e),
|
|
});
|
|
}
|
|
invalidateAllCaches();
|
|
if (resumeSessionFile) {
|
|
const activityDir = join(sfRoot(s.basePath), "activity");
|
|
const recovery = synthesizeCrashRecovery(
|
|
s.basePath,
|
|
s.currentUnit?.type ?? s.pausedUnitType ?? "unknown",
|
|
s.currentUnit?.id ?? s.pausedUnitId ?? "unknown",
|
|
resumeSessionFile ?? undefined,
|
|
activityDir,
|
|
);
|
|
if (recovery && recovery.trace.toolCallCount > 0) {
|
|
s.pendingCrashRecovery = recovery.prompt;
|
|
ctx.ui.notify(
|
|
`Recovered ${recovery.trace.toolCallCount} tool calls from paused session. Resuming with context.`,
|
|
"info",
|
|
);
|
|
}
|
|
}
|
|
updateSessionLock(
|
|
lockBase(),
|
|
"resuming",
|
|
s.currentMilestoneId ?? "unknown",
|
|
);
|
|
writeLock(lockBase(), "resuming", s.currentMilestoneId ?? "unknown");
|
|
logCmuxEvent(
|
|
loadEffectiveSFPreferences()?.preferences,
|
|
s.stepMode ? "Assisted mode resumed." : "Autonomous mode resumed.",
|
|
"progress",
|
|
);
|
|
captureProjectRootEnv(s.originalBasePath || s.basePath);
|
|
await runAutoLoopWithUok({
|
|
ctx,
|
|
pi,
|
|
s,
|
|
deps: buildLoopDeps(),
|
|
runKernelLoop: runUokKernelLoop,
|
|
});
|
|
cleanupAfterLoopExit(ctx);
|
|
return;
|
|
}
|
|
// ── Fresh start path — delegated to auto-start.ts ──
|
|
const bootstrapDeps = {
|
|
shouldUseWorktreeIsolation,
|
|
registerSigtermHandler,
|
|
lockBase,
|
|
buildResolver,
|
|
};
|
|
const ready = await bootstrapAutoSession(
|
|
s,
|
|
ctx,
|
|
pi,
|
|
base,
|
|
verboseMode,
|
|
requestedStepMode,
|
|
bootstrapDeps,
|
|
freshStartAssessment,
|
|
);
|
|
if (!ready) return;
|
|
captureProjectRootEnv(s.originalBasePath || s.basePath);
|
|
try {
|
|
syncCmuxSidebar(
|
|
loadEffectiveSFPreferences()?.preferences,
|
|
await deriveState(s.basePath),
|
|
);
|
|
} catch (err) {
|
|
// Best-effort only — sidebar sync must never block autonomous mode startup
|
|
logWarning(
|
|
"engine",
|
|
`cmux sync failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
logCmuxEvent(
|
|
loadEffectiveSFPreferences()?.preferences,
|
|
requestedStepMode ? "Assisted mode started." : "Autonomous mode started.",
|
|
"progress",
|
|
);
|
|
// Dispatch the first unit
|
|
await runAutoLoopWithUok({
|
|
ctx,
|
|
pi,
|
|
s,
|
|
deps: buildLoopDeps(),
|
|
runKernelLoop: runUokKernelLoop,
|
|
});
|
|
cleanupAfterLoopExit(ctx);
|
|
}
|
|
// ─── Agent End Handler ────────────────────────────────────────────────────────
|
|
/**
|
|
* Deprecated thin wrapper — kept as export for backward compatibility.
|
|
* The actual agent_end processing now happens via resolveAgentEnd() in auto-loop.ts,
|
|
* which is called directly from index.ts. The autoLoop() while loop handles all
|
|
* post-unit processing (verification, hooks, dispatch) that this function used to do.
|
|
*
|
|
* If called by straggler code, it simply resolves the pending promise so the loop
|
|
* can continue.
|
|
*/
|
|
export async function handleAgentEnd(_ctx, _pi) {
|
|
if (!s.active || !s.cmdCtx) {
|
|
// Even when inactive, resolve any pending promise so the loop is unblocked.
|
|
resolveAgentEndCancelled();
|
|
return;
|
|
}
|
|
clearUnitTimeout();
|
|
resolveAgentEnd({ messages: [] });
|
|
}
|
|
// describeNextUnit is imported from auto-dashboard.ts and re-exported
|
|
export { describeNextUnit } from "./auto-dashboard.js";
|
|
|
|
/** Thin wrapper: delegates to auto-dashboard.ts, passing state accessors. */
|
|
function updateProgressWidget(ctx, unitType, unitId, state) {
|
|
const badge = s.currentUnitRouting?.tier
|
|
? ({ light: "L", standard: "S", heavy: "H" }[s.currentUnitRouting.tier] ??
|
|
undefined)
|
|
: undefined;
|
|
_updateProgressWidget(
|
|
ctx,
|
|
unitType,
|
|
unitId,
|
|
state,
|
|
widgetStateAccessors,
|
|
badge,
|
|
);
|
|
}
|
|
/** State accessors for the widget — closures over module globals. */
|
|
const widgetStateAccessors = {
|
|
getAutoStartTime: () => s.autoStartTime,
|
|
isStepMode: () => s.stepMode,
|
|
getCmdCtx: () => s.cmdCtx,
|
|
getBasePath: () => s.basePath,
|
|
isVerbose: () => s.verbose,
|
|
isSessionSwitching: isSessionSwitchInFlight,
|
|
getCurrentDispatchedModelId: () => s.currentDispatchedModelId,
|
|
};
|
|
// ─── Preconditions ────────────────────────────────────────────────────────────
|
|
/**
|
|
* Ensure directories, branches, and other prerequisites exist before
|
|
* dispatching a unit. The LLM should never need to mkdir or git checkout.
|
|
*/
|
|
function ensurePreconditions(_unitType, unitId, base, _state) {
|
|
const { milestone: mid, slice: sid } = parseUnitId(unitId);
|
|
const mDir = resolveMilestonePath(base, mid);
|
|
if (!mDir) {
|
|
const newDir = join(milestonesDir(base), mid);
|
|
mkdirSync(join(newDir, "slices"), { recursive: true });
|
|
}
|
|
if (sid !== undefined) {
|
|
const mDirResolved = resolveMilestonePath(base, mid);
|
|
if (mDirResolved) {
|
|
const slicesDir = join(mDirResolved, "slices");
|
|
const sDir = resolveDir(slicesDir, sid);
|
|
if (!sDir) {
|
|
mkdirSync(join(slicesDir, sid, "tasks"), { recursive: true });
|
|
}
|
|
const resolvedSliceDir = resolveDir(slicesDir, sid) ?? sid;
|
|
const tasksDir = join(slicesDir, resolvedSliceDir, "tasks");
|
|
if (!existsSync(tasksDir)) {
|
|
mkdirSync(tasksDir, { recursive: true });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
export async function dispatchHookUnit(
|
|
ctx,
|
|
pi,
|
|
hookName,
|
|
triggerUnitType,
|
|
triggerUnitId,
|
|
hookPrompt,
|
|
hookModel,
|
|
targetBasePath,
|
|
) {
|
|
if (!s.active) {
|
|
s.active = true;
|
|
s.stepMode = true;
|
|
s.runControl = "assisted";
|
|
s.cmdCtx = ctx;
|
|
s.basePath = targetBasePath;
|
|
s.autoStartTime = Date.now();
|
|
s.currentUnit = null;
|
|
s.pendingQuickTasks = [];
|
|
}
|
|
const hookUnitType = `hook/${hookName}`;
|
|
const hookStartedAt = Date.now();
|
|
s.currentUnit = {
|
|
type: triggerUnitType,
|
|
id: triggerUnitId,
|
|
startedAt: hookStartedAt,
|
|
};
|
|
const result = await s.cmdCtx.newSession();
|
|
if (result.cancelled) {
|
|
await stopAuto(ctx, pi);
|
|
return false;
|
|
}
|
|
s.currentUnit = {
|
|
type: hookUnitType,
|
|
id: triggerUnitId,
|
|
startedAt: hookStartedAt,
|
|
};
|
|
if (hookModel) {
|
|
const availableModels = ctx.modelRegistry.getAvailable();
|
|
const match = resolveModelId(
|
|
hookModel,
|
|
availableModels,
|
|
ctx.model?.provider,
|
|
);
|
|
if (match) {
|
|
try {
|
|
await pi.setModel(match);
|
|
} catch (err) {
|
|
/* non-fatal */
|
|
logWarning(
|
|
"dispatch",
|
|
`hook model set failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
} else {
|
|
ctx.ui.notify(
|
|
`Hook model "${hookModel}" not found in available models. Falling back to current session model. ` +
|
|
`Ensure the model is defined in models.json and has auth configured.`,
|
|
"warning",
|
|
);
|
|
}
|
|
}
|
|
const sessionFile = normalizeSessionFilePath(
|
|
ctx.sessionManager.getSessionFile(),
|
|
);
|
|
writeLock(lockBase(), hookUnitType, triggerUnitId, sessionFile ?? undefined);
|
|
clearUnitTimeout();
|
|
const supervisor = resolveAutoSupervisorConfig();
|
|
const hookHardTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
|
|
s.unitTimeoutHandle = setTimeout(async () => {
|
|
s.unitTimeoutHandle = null;
|
|
if (!s.active) return;
|
|
ctx.ui.notify(
|
|
`Hook ${hookName} exceeded ${supervisor.hard_timeout_minutes ?? 30}min timeout. Pausing autonomous mode.`,
|
|
"warning",
|
|
);
|
|
resetHookState();
|
|
await pauseAuto(ctx, pi);
|
|
}, hookHardTimeoutMs);
|
|
ctx.ui.setStatus("sf-auto", s.stepMode ? "next" : "auto");
|
|
ctx.ui.notify(`Running post-unit hook: ${hookName}`, "info");
|
|
// Ensure cwd matches basePath before hook dispatch (#1389)
|
|
try {
|
|
if (process.cwd() !== s.basePath) process.chdir(s.basePath);
|
|
} catch (err) {
|
|
logWarning(
|
|
"engine",
|
|
`chdir failed before hook dispatch: ${err instanceof Error ? err.message : String(err)}`,
|
|
{ file: "auto.ts" },
|
|
);
|
|
}
|
|
debugLog("dispatchHookUnit", {
|
|
phase: "send-message",
|
|
promptLength: hookPrompt.length,
|
|
});
|
|
pi.sendMessage(
|
|
{ customType: "sf-auto", content: hookPrompt, display: true },
|
|
{ triggerTurn: true },
|
|
);
|
|
return true;
|
|
}
|
|
export { resolveExpectedArtifactPath } from "./auto-artifact-paths.js";
|
|
// Re-export recovery functions for external consumers
|
|
export { buildLoopRemediationSteps } from "./auto-recovery.js";
|