fix: auto-fallback to ready provider instead of stopping autonomous mode

When the selected model's provider is not request-ready:
1. Pre-flight check before runUnit: find any ready provider, switch to it
   and continue. Only stop if no ready provider exists.
2. Post-runUnit cancelled handler: same logic — reselect + return 'continue'
   instead of silently breaking.
3. Both paths now emit a visible ctx.ui.notify so the user can see what
   happened ('provider X not ready — retrying with Y/model').

Previously: cancelled instantly, all 4 repair attempts also cancelled,
paused with misleading solver-missing-checkpoint and no user notification.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-10 02:33:23 +02:00
parent 7c970088f1
commit b464f2a78e

View file

@ -2238,6 +2238,47 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
// Write preliminary lock (no session path yet — runUnit creates a new session).
// Crash recovery can still identify the in-flight unit from this lock.
deps.writeLock(deps.lockBase(), unitType, unitId);
// Pre-flight provider readiness check: if the resolved model's provider is
// not request-ready (expired token, logged out), attempt to reselect a ready
// provider before dispatching. This prevents the unit from burning a runUnit
// call only to be immediately cancelled with no-transcript.
{
const selectedProvider = s.currentUnitModel?.provider ?? ctx.model?.provider;
if (
selectedProvider != null &&
typeof ctx.modelRegistry?.isProviderRequestReady === "function"
) {
let ready = false;
try {
ready = ctx.modelRegistry.isProviderRequestReady(selectedProvider);
} catch {
ready = false;
}
if (!ready) {
const allModels = ctx.modelRegistry.getAvailable?.() ?? [];
const fallback = allModels.find(
(m) =>
m.provider !== selectedProvider &&
ctx.modelRegistry.isProviderRequestReady(m.provider),
);
if (fallback) {
const ok = await pi.setModel(fallback, { persist: false });
if (ok) {
ctx.ui.notify(
`Autonomous mode: provider ${selectedProvider} not ready — switched to ${fallback.provider}/${fallback.id}`,
"warning",
);
s.currentUnitModel = fallback;
}
} else {
const msg = `Autonomous mode stopped: provider ${selectedProvider} is not request-ready and no fallback provider is available. Check your login/API key.`;
ctx.ui.notify(msg, "error");
await deps.stopAuto(ctx, pi, msg);
return { action: "break", reason: "provider-pause" };
}
}
}
}
debugLog("autoLoop", {
phase: "runUnit-start",
iteration: ic.iteration,
@ -2483,8 +2524,8 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
unitId,
currentUnitResult.errorContext?.message ?? "cancelled",
);
// Provider-error pause: pauseAuto already handled cleanup and scheduled
// recovery. Don't hard-stop — just break out of the loop (#2762).
// Provider-error: try to reselect a ready provider and continue rather
// than stopping autonomous mode. Only stop if no ready provider exists.
if (currentUnitResult.errorContext?.category === "provider") {
await emitCancelledUnitEnd(
ic,
@ -2493,6 +2534,27 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
unitStartSeq,
currentUnitResult.errorContext,
);
const failedProvider =
s.currentUnitModel?.provider ?? ctx.model?.provider;
const allModels = ctx.modelRegistry?.getAvailable?.() ?? [];
const fallback = allModels.find(
(m) =>
m.provider !== failedProvider &&
ctx.modelRegistry?.isProviderRequestReady?.(m.provider),
);
if (fallback) {
const ok = await pi.setModel(fallback, { persist: false });
if (ok) {
s.currentUnitModel = fallback;
ctx.ui.notify(
`Autonomous mode: provider ${failedProvider} not ready — retrying with ${fallback.provider}/${fallback.id}`,
"warning",
);
return { action: "continue" };
}
}
const msg = `Autonomous mode stopped: ${currentUnitResult.errorContext.message ?? `provider ${failedProvider} not ready`}. Check your login/API key.`;
ctx.ui.notify(msg, "error");
debugLog("autoLoop", {
phase: "exit",
reason: "provider-pause",