feat: add cross-provider fallback when rate/quota limits are hit (#125)
When all credentials for a provider are exhausted, the system now automatically falls back to the next available provider in a user-configured fallback chain. Higher-priority providers are restored automatically when their backoff expires. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7f0caffd65
commit
adca6901ec
8 changed files with 995 additions and 15 deletions
380
.plans/issue-125-provider-fallback.md
Normal file
380
.plans/issue-125-provider-fallback.md
Normal file
|
|
@ -0,0 +1,380 @@
|
|||
# Issue #125: Provider Fallback When Multiple Providers Configured
|
||||
# Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
||||
|
||||
## Overview
|
||||
|
||||
Add cross-provider fallback so that when a provider hits rate/quota limits, the system
|
||||
automatically switches to another provider that serves an equivalent model (or a
|
||||
user-configured fallback chain of different models).
|
||||
|
||||
## Current State
|
||||
|
||||
The codebase already supports:
|
||||
- **Multi-credential per provider** — round-robin or session-sticky selection
|
||||
- **Per-credential backoff tracking** — rate_limit (30s), quota_exhausted (30min), server_error (20s)
|
||||
- **Credential rotation on error** — `markUsageLimitReached()` backs off one key and returns
|
||||
whether another key exists for the same provider
|
||||
- **Retry with exponential backoff** — 3 retries, 2s/4s/8s delays
|
||||
- **Error classification** — quota_exhausted, rate_limit, server_error, unknown
|
||||
|
||||
The gap: fallback only works within a single provider (multiple API keys). There is no
|
||||
mechanism to fall back to a *different provider* serving the same or equivalent model.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### Phase 1: Fallback Chain Configuration & Storage
|
||||
|
||||
**Goal:** Let users define ordered fallback chains that map a primary model to backup
|
||||
model+provider combos.
|
||||
|
||||
#### 1.1 — Settings Schema (`settings-manager.ts`)
|
||||
|
||||
Add a new top-level setting:
|
||||
|
||||
```typescript
|
||||
interface FallbackChainEntry {
|
||||
provider: string; // e.g. "zai", "alibaba", "openai"
|
||||
model: string; // e.g. "glm-5", "claude-opus-4-6"
|
||||
priority: number; // lower = higher priority (1 = primary)
|
||||
}
|
||||
|
||||
interface FallbackSettings {
|
||||
enabled: boolean; // default: false
|
||||
chains: Record<string, FallbackChainEntry[]>; // keyed by chain name
|
||||
// Example:
|
||||
// "coding": [
|
||||
// { provider: "zai", model: "glm-5", priority: 1 },
|
||||
// { provider: "alibaba", model: "glm-5", priority: 2 },
|
||||
// { provider: "openai", model: "gpt-4.1", priority: 3 }
|
||||
// ]
|
||||
}
|
||||
```
|
||||
|
||||
**Files to modify:**
|
||||
- `packages/pi-coding-agent/src/core/settings-manager.ts` — add `getFallbackSettings()`,
|
||||
`setFallbackChain()`, `removeFallbackChain()`, getter/setter for `fallback.enabled`
|
||||
|
||||
#### 1.2 — Settings File Location
|
||||
|
||||
Stored in the existing `~/.pi/agent/settings.json` under a new `fallback` key.
|
||||
|
||||
#### 1.3 — CLI Configuration Commands
|
||||
|
||||
Add subcommands to the existing settings CLI:
|
||||
- `pi settings fallback enable/disable`
|
||||
- `pi settings fallback add-chain <name> --provider <p> --model <m> --priority <n>`
|
||||
- `pi settings fallback remove-chain <name>`
|
||||
- `pi settings fallback list`
|
||||
|
||||
**Files to modify:**
|
||||
- `packages/pi-coding-agent/src/cli/commands/settings.ts` (or equivalent CLI entry point)
|
||||
|
||||
---
|
||||
|
||||
### Phase 2: Provider-Level Backoff Tracking
|
||||
|
||||
**Goal:** Track backoff state at the provider level (not just credential level) so the
|
||||
fallback system knows when an entire provider is unavailable.
|
||||
|
||||
#### 2.1 — Extend AuthStorage (`auth-storage.ts`)
|
||||
|
||||
Add a provider-level backoff map alongside the existing credential-level one:
|
||||
|
||||
```typescript
|
||||
private providerBackoff: Map<string, number> = new Map();
|
||||
// Map<provider, backoffExpiresAt>
|
||||
```
|
||||
|
||||
**New methods:**
|
||||
```typescript
|
||||
markProviderExhausted(provider: string, errorType: UsageLimitErrorType): void
|
||||
isProviderAvailable(provider: string): boolean
|
||||
getProviderBackoffRemaining(provider: string): number // ms until available, 0 if available
|
||||
```
|
||||
|
||||
**Logic:** When `markUsageLimitReached()` returns `false` (all credentials for a provider
|
||||
are backed off), also mark the provider itself as backed off with the longest remaining
|
||||
credential backoff duration.
|
||||
|
||||
**Files to modify:**
|
||||
- `packages/pi-coding-agent/src/core/auth-storage.ts`
|
||||
|
||||
---
|
||||
|
||||
### Phase 3: Fallback Resolution Engine
|
||||
|
||||
**Goal:** Given a current model+provider that just failed, find the next available
|
||||
fallback from the configured chain.
|
||||
|
||||
#### 3.1 — FallbackResolver (`fallback-resolver.ts` — new file)
|
||||
|
||||
```typescript
|
||||
// packages/pi-coding-agent/src/core/fallback-resolver.ts
|
||||
|
||||
export interface FallbackResult {
|
||||
model: Model<Api>;
|
||||
reason: string; // "quota_exhausted on zai, falling back to alibaba"
|
||||
}
|
||||
|
||||
export class FallbackResolver {
|
||||
constructor(
|
||||
private settings: SettingsManager,
|
||||
private authStorage: AuthStorage,
|
||||
private modelRegistry: ModelRegistry,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Find the next available fallback for the current model.
|
||||
* Returns null if no fallback is configured or available.
|
||||
*/
|
||||
async findFallback(
|
||||
currentModel: Model<Api>,
|
||||
errorType: UsageLimitErrorType,
|
||||
): Promise<FallbackResult | null> {
|
||||
// 1. Check if fallback is enabled
|
||||
// 2. Find chain(s) containing currentModel's provider+model
|
||||
// 3. Sort by priority
|
||||
// 4. Skip entries where provider is backed off
|
||||
// 5. Skip entries without valid API keys
|
||||
// 6. Return first available, or null
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the chain a model belongs to.
|
||||
*/
|
||||
findChainForModel(provider: string, modelId: string): FallbackChainEntry[] | null
|
||||
|
||||
/**
|
||||
* Get the highest-priority available model from a chain.
|
||||
* Used on session start to pick the best available model.
|
||||
*/
|
||||
async getBestAvailable(chainName: string): Promise<FallbackResult | null>
|
||||
}
|
||||
```
|
||||
|
||||
#### 3.2 — Model Equivalence
|
||||
|
||||
For same-model cross-provider fallback (Phase 1 of the feature), the chain entries
|
||||
explicitly name the provider+model pairs. No automatic equivalence detection needed —
|
||||
the user defines what's equivalent.
|
||||
|
||||
---
|
||||
|
||||
### Phase 4: Integrate Fallback into Retry Flow
|
||||
|
||||
**Goal:** When credential rotation fails (all keys for a provider exhausted), try the
|
||||
fallback chain before giving up or doing exponential backoff.
|
||||
|
||||
#### 4.1 — Modify `_handleRetryableError()` (`agent-session.ts`)
|
||||
|
||||
Current flow:
|
||||
```
|
||||
1. Classify error
|
||||
2. Try credential rotation within provider → if success, retry immediately
|
||||
3. If quota_exhausted and all backed off → give up
|
||||
4. Exponential backoff retry
|
||||
```
|
||||
|
||||
New flow:
|
||||
```
|
||||
1. Classify error
|
||||
2. Try credential rotation within provider → if success, retry immediately
|
||||
3. ** Try provider fallback via FallbackResolver **
|
||||
a. If fallback found → swap model on agent, retry immediately
|
||||
b. Emit event: "fallback_provider_switch" with old/new provider info
|
||||
4. If quota_exhausted and no fallback → give up
|
||||
5. Exponential backoff retry
|
||||
```
|
||||
|
||||
**Key changes in agent-session.ts (~lines 2317-2370):**
|
||||
|
||||
```typescript
|
||||
// After credential rotation fails:
|
||||
if (!hasAlternate) {
|
||||
const fallbackResult = await this.fallbackResolver?.findFallback(
|
||||
this.agent.model,
|
||||
errorType,
|
||||
);
|
||||
|
||||
if (fallbackResult) {
|
||||
// Swap to fallback model
|
||||
this.agent.setModel(fallbackResult.model);
|
||||
this._removeLastError();
|
||||
this._emitEvent("auto_retry_start", {
|
||||
attempt: this._retryAttempt + 1,
|
||||
delayMs: 0,
|
||||
reason: fallbackResult.reason,
|
||||
});
|
||||
await this.agent.continue();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### 4.2 — Agent Model Swapping
|
||||
|
||||
The agent needs a method to swap its model mid-conversation:
|
||||
|
||||
```typescript
|
||||
// agent.ts or agent-loop.ts
|
||||
setModel(model: Model<Api>): void {
|
||||
this.config.model = model;
|
||||
// Re-resolve API key for new provider
|
||||
}
|
||||
```
|
||||
|
||||
**Important:** The API key must also be re-resolved since we're switching providers.
|
||||
The `getApiKey` callback in `AgentOptions` already takes a provider string, so this
|
||||
should work naturally.
|
||||
|
||||
**Files to modify:**
|
||||
- `packages/pi-coding-agent/src/core/agent-session.ts`
|
||||
- `packages/pi-ai/src/agent.ts` or `packages/pi-ai/src/agent-loop.ts`
|
||||
|
||||
---
|
||||
|
||||
### Phase 5: Provider Restoration (Auto-Upgrade)
|
||||
|
||||
**Goal:** When a higher-priority provider's backoff expires, switch back to it.
|
||||
|
||||
#### 5.1 — Pre-Request Priority Check
|
||||
|
||||
Before each LLM request, check if a higher-priority provider in the chain has become
|
||||
available again:
|
||||
|
||||
```typescript
|
||||
// In agent-loop.ts streamAssistantResponse(), before calling streamFn:
|
||||
if (this.fallbackResolver) {
|
||||
const bestAvailable = await this.fallbackResolver.getBestAvailable(currentChain);
|
||||
if (bestAvailable && bestAvailable.model.provider !== currentModel.provider) {
|
||||
// Upgrade back to higher-priority provider
|
||||
this.setModel(bestAvailable.model);
|
||||
this._emitEvent("fallback_provider_restored", { ... });
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### 5.2 — Quota Reset Awareness (Future Enhancement)
|
||||
|
||||
For now, rely on backoff expiry times. A future enhancement could:
|
||||
- Parse rate limit headers for reset timestamps
|
||||
- Store per-provider quota windows (5-hour, daily, weekly, monthly)
|
||||
- Predict when quota will restore based on usage patterns
|
||||
|
||||
This is complex and should be a separate issue.
|
||||
|
||||
---
|
||||
|
||||
### Phase 6: User-Facing Events & UI
|
||||
|
||||
**Goal:** Surface fallback activity to the user so they know what's happening.
|
||||
|
||||
#### 6.1 — New Events
|
||||
|
||||
```typescript
|
||||
type FallbackEvent =
|
||||
| { type: "fallback_provider_switch"; from: string; to: string; reason: string }
|
||||
| { type: "fallback_provider_restored"; provider: string; reason: string }
|
||||
| { type: "fallback_chain_exhausted"; chain: string; reason: string }
|
||||
```
|
||||
|
||||
#### 6.2 — TUI Integration
|
||||
|
||||
Display a brief notification in the TUI when fallback occurs:
|
||||
- `⚡ Switched from zai/glm-5 → alibaba/glm-5 (rate limit)`
|
||||
- `✓ Restored to zai/glm-5 (quota available)`
|
||||
- `⚠ All providers in chain "coding" exhausted`
|
||||
|
||||
**Files to modify:**
|
||||
- `packages/pi-tui/src/` — event handler for new fallback events
|
||||
- Status bar or notification area in the TUI
|
||||
|
||||
---
|
||||
|
||||
## Implementation Order
|
||||
|
||||
| Step | Phase | Effort | Dependencies |
|
||||
|------|-------|--------|-------------|
|
||||
| 1 | Phase 1.1-1.2: Settings schema | Small | None |
|
||||
| 2 | Phase 2: Provider-level backoff | Small | None |
|
||||
| 3 | Phase 3: FallbackResolver | Medium | Steps 1, 2 |
|
||||
| 4 | Phase 4: Retry integration | Medium | Step 3 |
|
||||
| 5 | Phase 5.1: Auto-restoration | Small | Step 4 |
|
||||
| 6 | Phase 1.3: CLI commands | Small | Step 1 |
|
||||
| 7 | Phase 6: Events & UI | Small | Step 4 |
|
||||
|
||||
Steps 1 and 2 can be done in parallel. Steps 6 and 7 can be done in parallel.
|
||||
|
||||
---
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
### 1. Explicit chains vs automatic model equivalence
|
||||
**Decision:** Explicit user-configured chains.
|
||||
**Why:** Automatic equivalence is unreliable — models with the same name from different
|
||||
providers may have different capabilities, limits, or pricing. Users should explicitly
|
||||
opt in to which models they consider interchangeable.
|
||||
|
||||
### 2. Where fallback sits in the retry flow
|
||||
**Decision:** After credential rotation, before exponential backoff.
|
||||
**Why:** Provider fallback is a better recovery than waiting and retrying the same
|
||||
exhausted provider. If the fallback also fails, exponential backoff still kicks in.
|
||||
|
||||
### 3. Model swap vs new agent
|
||||
**Decision:** Swap model on existing agent mid-conversation.
|
||||
**Why:** Creating a new agent would lose conversation context. The agent's `streamFn`
|
||||
already accepts model as a parameter, and `getApiKey` resolves per-provider, so
|
||||
swapping is straightforward.
|
||||
|
||||
### 4. Restoration strategy
|
||||
**Decision:** Check before each request (lazy check on backoff expiry).
|
||||
**Why:** No background timers needed. The cost of one `isProviderAvailable()` check
|
||||
per request is negligible. More sophisticated quota tracking can be added later.
|
||||
|
||||
### 5. Scope of fallback
|
||||
**Decision:** Per-session, not per-agent-type (initially).
|
||||
**Why:** The issue mentions per-agent-type toggle, but the simpler initial implementation
|
||||
is a global fallback chain that applies to any session using a model in the chain.
|
||||
Per-agent-type scoping can be added by extending the chain config with an `agentTypes`
|
||||
filter.
|
||||
|
||||
---
|
||||
|
||||
## Risks & Mitigations
|
||||
|
||||
| Risk | Impact | Mitigation |
|
||||
|------|--------|-----------|
|
||||
| Model swap mid-conversation changes behavior | Medium | Log the swap, let user disable fallback |
|
||||
| Different providers have different tool/feature support | High | Validate fallback model supports same API features before swapping |
|
||||
| Credential resolution race conditions | Low | Use existing file-lock mechanism in auth-storage |
|
||||
| Chain misconfiguration (nonexistent model) | Low | Validate chain entries on save, warn on invalid |
|
||||
| Backoff timing mismatch with actual quota reset | Medium | Conservative backoff defaults; Phase 5.2 for future improvement |
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
1. **Unit tests for FallbackResolver** — mock auth-storage and model-registry, test chain
|
||||
resolution, priority ordering, backoff skipping
|
||||
2. **Unit tests for extended auth-storage** — provider-level backoff tracking
|
||||
3. **Integration test for retry flow** — simulate rate limit → credential fallback →
|
||||
provider fallback → restoration
|
||||
4. **E2E test** — configure a chain, hit rate limit on provider A, verify automatic
|
||||
switch to provider B
|
||||
5. **Settings tests** — validate chain CRUD operations, persistence, invalid input handling
|
||||
|
||||
---
|
||||
|
||||
## Files Summary
|
||||
|
||||
| File | Action | Changes |
|
||||
|------|--------|---------|
|
||||
| `packages/pi-coding-agent/src/core/settings-manager.ts` | Modify | Add FallbackSettings types, getters/setters |
|
||||
| `packages/pi-coding-agent/src/core/auth-storage.ts` | Modify | Add provider-level backoff tracking |
|
||||
| `packages/pi-coding-agent/src/core/fallback-resolver.ts` | **New** | FallbackResolver class |
|
||||
| `packages/pi-coding-agent/src/core/agent-session.ts` | Modify | Integrate fallback into retry flow |
|
||||
| `packages/pi-ai/src/agent.ts` | Modify | Add `setModel()` method |
|
||||
| `packages/pi-coding-agent/src/cli/commands/settings.ts` | Modify | Add fallback CLI subcommands |
|
||||
| `packages/pi-tui/src/` | Modify | Fallback event display |
|
||||
|
|
@ -70,6 +70,7 @@ import {
|
|||
wrapToolsWithExtensions,
|
||||
} from "./extensions/index.js";
|
||||
import type { BashExecutionMessage, CustomMessage } from "./messages.js";
|
||||
import { FallbackResolver } from "./fallback-resolver.js";
|
||||
import type { ModelRegistry } from "./model-registry.js";
|
||||
import { expandPromptTemplate, type PromptTemplate } from "./prompt-templates.js";
|
||||
import type { ResourceExtensionPaths, ResourceLoader } from "./resource-loader.js";
|
||||
|
|
@ -120,7 +121,10 @@ export type AgentSessionEvent =
|
|||
errorMessage?: string;
|
||||
}
|
||||
| { type: "auto_retry_start"; attempt: number; maxAttempts: number; delayMs: number; errorMessage: string }
|
||||
| { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string };
|
||||
| { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string }
|
||||
| { type: "fallback_provider_switch"; from: string; to: string; reason: string }
|
||||
| { type: "fallback_provider_restored"; provider: string; reason: string }
|
||||
| { type: "fallback_chain_exhausted"; reason: string };
|
||||
|
||||
/** Listener function for agent session events */
|
||||
export type AgentSessionEventListener = (event: AgentSessionEvent) => void;
|
||||
|
|
@ -267,6 +271,9 @@ export class AgentSession {
|
|||
// Model registry for API key resolution
|
||||
private _modelRegistry: ModelRegistry;
|
||||
|
||||
// Provider fallback resolver
|
||||
private _fallbackResolver: FallbackResolver;
|
||||
|
||||
// Tool registry for extension getTools/setTools
|
||||
private _toolRegistry: Map<string, AgentTool> = new Map();
|
||||
private _toolPromptSnippets: Map<string, string> = new Map();
|
||||
|
|
@ -284,6 +291,11 @@ export class AgentSession {
|
|||
this._customTools = config.customTools ?? [];
|
||||
this._cwd = config.cwd;
|
||||
this._modelRegistry = config.modelRegistry;
|
||||
this._fallbackResolver = new FallbackResolver(
|
||||
this.settingsManager,
|
||||
this._modelRegistry.authStorage,
|
||||
this._modelRegistry,
|
||||
);
|
||||
this._extensionRunnerRef = config.extensionRunnerRef;
|
||||
this._initialActiveToolNames = config.initialActiveToolNames;
|
||||
this._baseToolsOverride = config.baseToolsOverride;
|
||||
|
|
@ -303,6 +315,11 @@ export class AgentSession {
|
|||
return this._modelRegistry;
|
||||
}
|
||||
|
||||
/** Fallback resolver for cross-provider fallback */
|
||||
get fallbackResolver(): FallbackResolver {
|
||||
return this._fallbackResolver;
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Event Subscription
|
||||
// =========================================================================
|
||||
|
|
@ -868,6 +885,19 @@ export class AgentSession {
|
|||
);
|
||||
}
|
||||
|
||||
// Check if a higher-priority provider in the fallback chain has recovered
|
||||
const restoration = await this._fallbackResolver.checkForRestoration(this.model);
|
||||
if (restoration) {
|
||||
const previousProvider = `${this.model.provider}/${this.model.id}`;
|
||||
this.agent.setModel(restoration.model);
|
||||
this.sessionManager.appendModelChange(restoration.model.provider, restoration.model.id);
|
||||
this._emit({
|
||||
type: "fallback_provider_restored",
|
||||
provider: `${restoration.model.provider}/${restoration.model.id}`,
|
||||
reason: `Restored from ${previousProvider}`,
|
||||
});
|
||||
}
|
||||
|
||||
// Validate API key
|
||||
const apiKey = await this._modelRegistry.getApiKey(this.model, this.sessionId);
|
||||
if (!apiKey) {
|
||||
|
|
@ -2354,20 +2384,66 @@ export class AgentSession {
|
|||
return true;
|
||||
}
|
||||
|
||||
// All credentials are backed off. For quota-exhausted errors the backoff is very
|
||||
// long (30+ min), so retrying immediately is futile and will only produce
|
||||
// confusing secondary errors (e.g. "Authentication failed"). Give up now and
|
||||
// surface the original quota error to the user.
|
||||
if (errorType === "quota_exhausted") {
|
||||
this._emit({
|
||||
type: "auto_retry_end",
|
||||
success: false,
|
||||
attempt: this._retryAttempt,
|
||||
finalError: message.errorMessage,
|
||||
});
|
||||
this._retryAttempt = 0;
|
||||
this._resolveRetry();
|
||||
return false;
|
||||
// All credentials are backed off. Try cross-provider fallback before giving up.
|
||||
if (isCredentialError) {
|
||||
const fallbackResult = await this._fallbackResolver.findFallback(
|
||||
this.model,
|
||||
errorType,
|
||||
);
|
||||
|
||||
if (fallbackResult) {
|
||||
// Swap to fallback model — don't persist to settings
|
||||
const previousProvider = this.model.provider;
|
||||
this.agent.setModel(fallbackResult.model);
|
||||
this.sessionManager.appendModelChange(fallbackResult.model.provider, fallbackResult.model.id);
|
||||
|
||||
// Remove error message from agent state
|
||||
const msgs = this.agent.state.messages;
|
||||
if (msgs.length > 0 && msgs[msgs.length - 1].role === "assistant") {
|
||||
this.agent.replaceMessages(msgs.slice(0, -1));
|
||||
}
|
||||
|
||||
this._emit({
|
||||
type: "fallback_provider_switch",
|
||||
from: `${previousProvider}/${this.model?.id}`,
|
||||
to: `${fallbackResult.model.provider}/${fallbackResult.model.id}`,
|
||||
reason: fallbackResult.reason,
|
||||
});
|
||||
|
||||
this._emit({
|
||||
type: "auto_retry_start",
|
||||
attempt: this._retryAttempt + 1,
|
||||
maxAttempts: settings.maxRetries,
|
||||
delayMs: 0,
|
||||
errorMessage: `${message.errorMessage} (${fallbackResult.reason})`,
|
||||
});
|
||||
|
||||
// Retry immediately with fallback provider - don't increment _retryAttempt
|
||||
setTimeout(() => {
|
||||
this.agent.continue().catch(() => {
|
||||
// Retry failed - will be caught by next agent_end
|
||||
});
|
||||
}, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// No fallback available either
|
||||
if (errorType === "quota_exhausted") {
|
||||
this._emit({
|
||||
type: "fallback_chain_exhausted",
|
||||
reason: `All providers exhausted for ${this.model.provider}/${this.model.id}`,
|
||||
});
|
||||
this._emit({
|
||||
type: "auto_retry_end",
|
||||
success: false,
|
||||
attempt: this._retryAttempt,
|
||||
finalError: message.errorMessage,
|
||||
});
|
||||
this._retryAttempt = 0;
|
||||
this._resolveRetry();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -248,6 +248,13 @@ export class AuthStorage {
|
|||
*/
|
||||
private credentialBackoff: Map<string, Map<number, number>> = new Map();
|
||||
|
||||
/**
|
||||
* Provider-level backoff tracking.
|
||||
* Set when all credentials for a provider are backed off.
|
||||
* Map<provider, backoffExpiresAt>
|
||||
*/
|
||||
private providerBackoff: Map<string, number> = new Map();
|
||||
|
||||
private constructor(private storage: AuthStorageBackend) {
|
||||
this.reload();
|
||||
}
|
||||
|
|
@ -398,6 +405,7 @@ export class AuthStorage {
|
|||
delete this.data[provider];
|
||||
this.providerRoundRobinIndex.delete(provider);
|
||||
this.credentialBackoff.delete(provider);
|
||||
this.providerBackoff.delete(provider);
|
||||
this.persistProviderChange(provider, undefined);
|
||||
}
|
||||
|
||||
|
|
@ -484,6 +492,43 @@ export class AuthStorage {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark an entire provider as exhausted.
|
||||
* Called when all credentials for a provider are backed off.
|
||||
*/
|
||||
markProviderExhausted(provider: string, errorType: UsageLimitErrorType): void {
|
||||
const backoffMs = getBackoffDuration(errorType);
|
||||
this.providerBackoff.set(provider, Date.now() + backoffMs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a provider is currently available (not backed off at provider level).
|
||||
*/
|
||||
isProviderAvailable(provider: string): boolean {
|
||||
const expiresAt = this.providerBackoff.get(provider);
|
||||
if (expiresAt === undefined) return true;
|
||||
if (Date.now() >= expiresAt) {
|
||||
this.providerBackoff.delete(provider);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get milliseconds remaining until provider backoff expires.
|
||||
* Returns 0 if provider is available.
|
||||
*/
|
||||
getProviderBackoffRemaining(provider: string): number {
|
||||
const expiresAt = this.providerBackoff.get(provider);
|
||||
if (expiresAt === undefined) return 0;
|
||||
const remaining = expiresAt - Date.now();
|
||||
if (remaining <= 0) {
|
||||
this.providerBackoff.delete(provider);
|
||||
return 0;
|
||||
}
|
||||
return remaining;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a credential index is currently backed off.
|
||||
*/
|
||||
|
|
|
|||
229
packages/pi-coding-agent/src/core/fallback-resolver.test.ts
Normal file
229
packages/pi-coding-agent/src/core/fallback-resolver.test.ts
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
// GSD Provider Fallback Resolver Tests
|
||||
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
||||
|
||||
import { describe, it, beforeEach, mock } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { FallbackResolver } from "./fallback-resolver.js";
|
||||
import type { Api, Model } from "@gsd/pi-ai";
|
||||
import type { AuthStorage } from "./auth-storage.js";
|
||||
import type { ModelRegistry } from "./model-registry.js";
|
||||
import type { FallbackChainEntry, SettingsManager } from "./settings-manager.js";
|
||||
|
||||
function createMockModel(provider: string, id: string): Model<Api> {
|
||||
return {
|
||||
id,
|
||||
name: id,
|
||||
api: "openai-completions" as Api,
|
||||
provider,
|
||||
baseUrl: `https://api.${provider}.com`,
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
} as Model<Api>;
|
||||
}
|
||||
|
||||
const zaiModel = createMockModel("zai", "glm-5");
|
||||
const alibabaModel = createMockModel("alibaba", "glm-5");
|
||||
const openaiModel = createMockModel("openai", "gpt-4.1");
|
||||
|
||||
const defaultChain: FallbackChainEntry[] = [
|
||||
{ provider: "zai", model: "glm-5", priority: 1 },
|
||||
{ provider: "alibaba", model: "glm-5", priority: 2 },
|
||||
{ provider: "openai", model: "gpt-4.1", priority: 3 },
|
||||
];
|
||||
|
||||
function createResolver(overrides?: {
|
||||
enabled?: boolean;
|
||||
isProviderAvailable?: (provider: string) => boolean;
|
||||
hasAuth?: (provider: string) => boolean;
|
||||
find?: (provider: string, modelId: string) => Model<Api> | undefined;
|
||||
}) {
|
||||
const settingsManager = {
|
||||
getFallbackSettings: () => ({
|
||||
enabled: overrides?.enabled ?? true,
|
||||
chains: { coding: defaultChain },
|
||||
}),
|
||||
} as unknown as SettingsManager;
|
||||
|
||||
const authStorage = {
|
||||
markProviderExhausted: mock.fn(),
|
||||
isProviderAvailable: overrides?.isProviderAvailable ?? (() => true),
|
||||
hasAuth: overrides?.hasAuth ?? (() => true),
|
||||
} as unknown as AuthStorage;
|
||||
|
||||
const modelRegistry = {
|
||||
find: overrides?.find ?? ((provider: string, modelId: string) => {
|
||||
if (provider === "zai" && modelId === "glm-5") return zaiModel;
|
||||
if (provider === "alibaba" && modelId === "glm-5") return alibabaModel;
|
||||
if (provider === "openai" && modelId === "gpt-4.1") return openaiModel;
|
||||
return undefined;
|
||||
}),
|
||||
} as unknown as ModelRegistry;
|
||||
|
||||
return { resolver: new FallbackResolver(settingsManager, authStorage, modelRegistry), authStorage };
|
||||
}
|
||||
|
||||
// ─── findFallback ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("FallbackResolver — findFallback", () => {
|
||||
it("returns next available provider when current fails", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "alibaba");
|
||||
assert.equal(result!.model.id, "glm-5");
|
||||
assert.equal(result!.chainName, "coding");
|
||||
});
|
||||
|
||||
it("marks current provider as exhausted", async () => {
|
||||
const { resolver, authStorage } = createResolver();
|
||||
await resolver.findFallback(zaiModel, "rate_limit");
|
||||
|
||||
const fn = authStorage.markProviderExhausted as any;
|
||||
assert.equal(fn.mock.calls.length, 1);
|
||||
assert.equal(fn.mock.calls[0].arguments[0], "zai");
|
||||
assert.equal(fn.mock.calls[0].arguments[1], "rate_limit");
|
||||
});
|
||||
|
||||
it("skips backed-off providers", async () => {
|
||||
const { resolver } = createResolver({
|
||||
isProviderAvailable: (provider: string) => provider !== "alibaba",
|
||||
});
|
||||
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "openai");
|
||||
assert.equal(result!.model.id, "gpt-4.1");
|
||||
});
|
||||
|
||||
it("returns null when all providers are backed off", async () => {
|
||||
const { resolver } = createResolver({
|
||||
isProviderAvailable: () => false,
|
||||
});
|
||||
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("returns null when fallback is disabled", async () => {
|
||||
const { resolver } = createResolver({ enabled: false });
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("returns null when model is not in any chain", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const unknownModel = createMockModel("unknown", "some-model");
|
||||
const result = await resolver.findFallback(unknownModel, "quota_exhausted");
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("skips providers without auth", async () => {
|
||||
const { resolver } = createResolver({
|
||||
hasAuth: (provider: string) => provider !== "alibaba",
|
||||
});
|
||||
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "openai");
|
||||
});
|
||||
|
||||
it("skips providers with no model in registry", async () => {
|
||||
const { resolver } = createResolver({
|
||||
find: (provider: string, modelId: string) => {
|
||||
if (provider === "alibaba") return undefined;
|
||||
if (provider === "openai" && modelId === "gpt-4.1") return openaiModel;
|
||||
return undefined;
|
||||
},
|
||||
});
|
||||
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "openai");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── checkForRestoration ─────────────────────────────────────────────────────
|
||||
|
||||
describe("FallbackResolver — checkForRestoration", () => {
|
||||
it("returns higher-priority provider when recovered", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const result = await resolver.checkForRestoration(alibabaModel);
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "zai");
|
||||
assert.equal(result!.model.id, "glm-5");
|
||||
});
|
||||
|
||||
it("returns null when already at highest priority", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const result = await resolver.checkForRestoration(zaiModel);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("returns null when higher-priority provider is still backed off", async () => {
|
||||
const { resolver } = createResolver({
|
||||
isProviderAvailable: (provider: string) => provider !== "zai",
|
||||
});
|
||||
|
||||
const result = await resolver.checkForRestoration(alibabaModel);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("returns null when fallback is disabled", async () => {
|
||||
const { resolver } = createResolver({ enabled: false });
|
||||
const result = await resolver.checkForRestoration(alibabaModel);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── getBestAvailable ────────────────────────────────────────────────────────
|
||||
|
||||
describe("FallbackResolver — getBestAvailable", () => {
|
||||
it("returns highest-priority available provider", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const result = await resolver.getBestAvailable("coding");
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "zai");
|
||||
});
|
||||
|
||||
it("skips backed-off providers", async () => {
|
||||
const { resolver } = createResolver({
|
||||
isProviderAvailable: (provider: string) => provider !== "zai",
|
||||
});
|
||||
|
||||
const result = await resolver.getBestAvailable("coding");
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "alibaba");
|
||||
});
|
||||
|
||||
it("returns null for unknown chain", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const result = await resolver.getBestAvailable("nonexistent");
|
||||
assert.equal(result, null);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── findChainsForModel ──────────────────────────────────────────────────────
|
||||
|
||||
describe("FallbackResolver — findChainsForModel", () => {
|
||||
it("finds chains containing a model", () => {
|
||||
const { resolver } = createResolver();
|
||||
const chains = resolver.findChainsForModel("zai", "glm-5");
|
||||
assert.deepEqual(chains, ["coding"]);
|
||||
});
|
||||
|
||||
it("returns empty array for model not in any chain", () => {
|
||||
const { resolver } = createResolver();
|
||||
const chains = resolver.findChainsForModel("unknown", "model");
|
||||
assert.deepEqual(chains, []);
|
||||
});
|
||||
});
|
||||
165
packages/pi-coding-agent/src/core/fallback-resolver.ts
Normal file
165
packages/pi-coding-agent/src/core/fallback-resolver.ts
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
// GSD Provider Fallback Resolver
|
||||
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
||||
|
||||
/**
|
||||
* FallbackResolver - Cross-provider fallback when rate/quota limits are hit.
|
||||
*
|
||||
* When a provider's credentials are all exhausted, this resolver finds the next
|
||||
* available provider+model from a user-configured fallback chain. It also handles
|
||||
* restoration: checking if a higher-priority provider has recovered before each request.
|
||||
*/
|
||||
|
||||
import type { Api, Model } from "@gsd/pi-ai";
|
||||
import type { AuthStorage, UsageLimitErrorType } from "./auth-storage.js";
|
||||
import type { ModelRegistry } from "./model-registry.js";
|
||||
import type { FallbackChainEntry, SettingsManager } from "./settings-manager.js";
|
||||
|
||||
export interface FallbackResult {
|
||||
model: Model<Api>;
|
||||
chainName: string;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export class FallbackResolver {
|
||||
constructor(
|
||||
private settingsManager: SettingsManager,
|
||||
private authStorage: AuthStorage,
|
||||
private modelRegistry: ModelRegistry,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Find the next available fallback for a model that just failed.
|
||||
* Searches all chains for entries matching the current model's provider+id,
|
||||
* then returns the next available entry with lower priority (higher number).
|
||||
*
|
||||
* @returns FallbackResult if a fallback is available, null otherwise
|
||||
*/
|
||||
async findFallback(
|
||||
currentModel: Model<Api>,
|
||||
errorType: UsageLimitErrorType,
|
||||
): Promise<FallbackResult | null> {
|
||||
const { enabled, chains } = this.settingsManager.getFallbackSettings();
|
||||
if (!enabled) return null;
|
||||
|
||||
// Mark the current provider as exhausted at the provider level
|
||||
this.authStorage.markProviderExhausted(currentModel.provider, errorType);
|
||||
|
||||
// Search all chains for one containing the current model
|
||||
for (const [chainName, entries] of Object.entries(chains)) {
|
||||
const currentIndex = entries.findIndex(
|
||||
(e) => e.provider === currentModel.provider && e.model === currentModel.id,
|
||||
);
|
||||
|
||||
if (currentIndex === -1) continue;
|
||||
|
||||
// Try entries after the current one (already sorted by priority)
|
||||
const result = await this._findAvailableInChain(chainName, entries, currentIndex + 1);
|
||||
if (result) return result;
|
||||
|
||||
// Wrap around: try entries before the current one
|
||||
const wrapResult = await this._findAvailableInChain(chainName, entries, 0, currentIndex);
|
||||
if (wrapResult) return wrapResult;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a higher-priority provider in the chain has recovered.
|
||||
* Called before each LLM request to restore the best available provider.
|
||||
*
|
||||
* @returns FallbackResult if a better provider is available, null if current is best
|
||||
*/
|
||||
async checkForRestoration(currentModel: Model<Api>): Promise<FallbackResult | null> {
|
||||
const { enabled, chains } = this.settingsManager.getFallbackSettings();
|
||||
if (!enabled) return null;
|
||||
|
||||
for (const [chainName, entries] of Object.entries(chains)) {
|
||||
const currentIndex = entries.findIndex(
|
||||
(e) => e.provider === currentModel.provider && e.model === currentModel.id,
|
||||
);
|
||||
|
||||
if (currentIndex === -1) continue;
|
||||
|
||||
// Only check entries with higher priority (lower index = higher priority)
|
||||
if (currentIndex === 0) continue; // Already at highest priority
|
||||
|
||||
const result = await this._findAvailableInChain(chainName, entries, 0, currentIndex);
|
||||
if (result) {
|
||||
return {
|
||||
...result,
|
||||
reason: `${result.model.provider}/${result.model.id} recovered, restoring from fallback`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best available model from a named chain.
|
||||
* Useful for initial model selection.
|
||||
*/
|
||||
async getBestAvailable(chainName: string): Promise<FallbackResult | null> {
|
||||
const { enabled, chains } = this.settingsManager.getFallbackSettings();
|
||||
if (!enabled) return null;
|
||||
|
||||
const entries = chains[chainName];
|
||||
if (!entries || entries.length === 0) return null;
|
||||
|
||||
return this._findAvailableInChain(chainName, entries, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the chain(s) a model belongs to.
|
||||
*/
|
||||
findChainsForModel(provider: string, modelId: string): string[] {
|
||||
const { chains } = this.settingsManager.getFallbackSettings();
|
||||
const result: string[] = [];
|
||||
|
||||
for (const [chainName, entries] of Object.entries(chains)) {
|
||||
if (entries.some((e) => e.provider === provider && e.model === modelId)) {
|
||||
result.push(chainName);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search a chain for the first available entry starting from startIndex.
|
||||
*/
|
||||
private async _findAvailableInChain(
|
||||
chainName: string,
|
||||
entries: FallbackChainEntry[],
|
||||
startIndex: number,
|
||||
endIndex?: number,
|
||||
): Promise<FallbackResult | null> {
|
||||
const end = endIndex ?? entries.length;
|
||||
|
||||
for (let i = startIndex; i < end; i++) {
|
||||
const entry = entries[i];
|
||||
|
||||
// Check provider-level backoff
|
||||
if (!this.authStorage.isProviderAvailable(entry.provider)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if model exists in registry
|
||||
const model = this.modelRegistry.find(entry.provider, entry.model);
|
||||
if (!model) continue;
|
||||
|
||||
// Check if API key is available
|
||||
const hasAuth = this.authStorage.hasAuth(entry.provider);
|
||||
if (!hasAuth) continue;
|
||||
|
||||
return {
|
||||
model,
|
||||
chainName,
|
||||
reason: `falling back to ${entry.provider}/${entry.model}`,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ export {
|
|||
type SessionStats,
|
||||
} from "./agent-session.js";
|
||||
export { type BashExecutorOptions, type BashResult, executeBash, executeBashWithOperations } from "./bash-executor.js";
|
||||
export { FallbackResolver, type FallbackResult } from "./fallback-resolver.js";
|
||||
export type { CompactionResult } from "./compaction/index.js";
|
||||
export { createEventBus, type EventBus, type EventBusController } from "./event-bus.js";
|
||||
|
||||
|
|
|
|||
|
|
@ -68,6 +68,17 @@ export interface TaskIsolationSettings {
|
|||
merge?: "patch" | "branch"; // default: "patch"
|
||||
}
|
||||
|
||||
export interface FallbackChainEntry {
|
||||
provider: string;
|
||||
model: string;
|
||||
priority: number;
|
||||
}
|
||||
|
||||
export interface FallbackSettings {
|
||||
enabled?: boolean; // default: false
|
||||
chains?: Record<string, FallbackChainEntry[]>; // keyed by chain name
|
||||
}
|
||||
|
||||
export type TransportSetting = Transport;
|
||||
|
||||
/**
|
||||
|
|
@ -122,6 +133,7 @@ export interface Settings {
|
|||
async?: AsyncSettings;
|
||||
bashInterceptor?: BashInterceptorSettings;
|
||||
taskIsolation?: TaskIsolationSettings;
|
||||
fallback?: FallbackSettings;
|
||||
}
|
||||
|
||||
/** Deep merge settings: project/overrides take precedence, nested objects merge recursively */
|
||||
|
|
@ -1010,4 +1022,58 @@ export class SettingsManager {
|
|||
getTaskIsolationMerge(): "patch" | "branch" {
|
||||
return this.settings.taskIsolation?.merge ?? "patch";
|
||||
}
|
||||
|
||||
getFallbackEnabled(): boolean {
|
||||
return this.settings.fallback?.enabled ?? false;
|
||||
}
|
||||
|
||||
setFallbackEnabled(enabled: boolean): void {
|
||||
if (!this.globalSettings.fallback) {
|
||||
this.globalSettings.fallback = {};
|
||||
}
|
||||
this.globalSettings.fallback.enabled = enabled;
|
||||
this.markModified("fallback", "enabled");
|
||||
this.save();
|
||||
}
|
||||
|
||||
getFallbackChains(): Record<string, FallbackChainEntry[]> {
|
||||
return this.settings.fallback?.chains ?? {};
|
||||
}
|
||||
|
||||
getFallbackChain(name: string): FallbackChainEntry[] | undefined {
|
||||
return this.settings.fallback?.chains?.[name];
|
||||
}
|
||||
|
||||
setFallbackChain(name: string, entries: FallbackChainEntry[]): void {
|
||||
if (!this.globalSettings.fallback) {
|
||||
this.globalSettings.fallback = {};
|
||||
}
|
||||
if (!this.globalSettings.fallback.chains) {
|
||||
this.globalSettings.fallback.chains = {};
|
||||
}
|
||||
// Sort by priority
|
||||
this.globalSettings.fallback.chains[name] = [...entries].sort((a, b) => a.priority - b.priority);
|
||||
this.markModified("fallback");
|
||||
this.save();
|
||||
}
|
||||
|
||||
removeFallbackChain(name: string): boolean {
|
||||
if (!this.globalSettings.fallback?.chains?.[name]) {
|
||||
return false;
|
||||
}
|
||||
delete this.globalSettings.fallback.chains[name];
|
||||
if (Object.keys(this.globalSettings.fallback.chains).length === 0) {
|
||||
delete this.globalSettings.fallback.chains;
|
||||
}
|
||||
this.markModified("fallback");
|
||||
this.save();
|
||||
return true;
|
||||
}
|
||||
|
||||
getFallbackSettings(): { enabled: boolean; chains: Record<string, FallbackChainEntry[]> } {
|
||||
return {
|
||||
enabled: this.getFallbackEnabled(),
|
||||
chains: this.getFallbackChains(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2382,6 +2382,24 @@ export class InteractiveMode {
|
|||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
|
||||
case "fallback_provider_switch": {
|
||||
this.showStatus(`Switched from ${event.from} → ${event.to} (${event.reason})`);
|
||||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
|
||||
case "fallback_provider_restored": {
|
||||
this.showStatus(`Restored to ${event.provider}`);
|
||||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
|
||||
case "fallback_chain_exhausted": {
|
||||
this.showError(event.reason);
|
||||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue