feat: record retrieval evidence across context tools

This commit is contained in:
Mikael Hugo 2026-05-07 18:17:41 +02:00
parent 05f185256c
commit b0fce94f9e
33 changed files with 1661 additions and 224 deletions

View file

@ -1,14 +1,16 @@
# CLI Agent Code Survey — 2026-05-07
# SF + ACE Full-Stack Reference Survey — 2026-05-07
This record compares the local coding-agent checkouts under `/home/mhugo/code/`
against Forge. It is planning evidence, not an instruction to copy another
product's architecture.
This record compares local coding-agent, orchestration, retrieval, model, and
platform-engineering references under `/home/mhugo/code/` plus selected indexed
public references against the intended SF+ACE full-stack flow. It is planning
evidence, not an instruction to copy another product's architecture.
## Product Boundary
Forge remains the product, and UOK remains the internal execution kernel.
External CLIs are reference implementations used to sharpen Forge, not
destination architectures.
Forge remains the local product/runtime surface, ACE remains the higher-level
workflow/control-plane layer, and UOK remains the internal execution safety
kernel. External systems are reference implementations used to sharpen the
unified SF+ACE flow, not destination architectures.
Hard boundary: Forge must stay an MCP client only. Do not add, restore, or plan
an SF MCP server. External control belongs in daemon, RPC, and headless
@ -38,10 +40,50 @@ Additional coder references:
- `open-codex`
- `letta-code`
- `neovate-code`
- `symphony`
- `singularity/machine` (`codemachine`)
Indexed-only references to include in future passes:
- `kimi-cli` / Kimi Code
- Spec Kit
- upstream CodeMachine CLI (`moazbuilds/CodeMachine-CLI`)
The local `claude-code` checkout is a leaked-source/sourcemap research mirror,
not a clean upstream dependency. Treat it as ergonomics evidence only.
## SF + ACE Full-Stack Reference Map
The long-term target is a unified SF+ACE autonomous software flow, not a
collection of unrelated coding assistants. Compare each repo at the layer where
it is strongest.
| Repo / Tool | Full-Stack Layer | Pattern To Study | Evidence Mode | Safe `sift` Scope |
|---|---|---|---|---|
| `singularity-forge` | Local product/runtime | UOK, DB-first state, CLI/TUI/headless, extension tools, MCP-client-only guardrails | local source + `sift` | `docs/`, `src/resources/extensions/sf/`, `packages/*/src/`, tests |
| `ace-coder` | Workflow/control plane | HTDAG/YAML workflow DAGs, reviewers, quality gates, deployment governance, multi-repo memory | local source + `sift` only | `AGENTS.md`, `CLAUDE.md`, `docs/`, `.agents/skills/`, `python/ai_dev/` first-party modules |
| `symphony` | Work orchestration | Linear polling, isolated per-issue workspaces, `WORKFLOW.md`, Codex app-server, retries, PR review/landing | local source + Context7 `/openai/symphony` | `README.md`, `SPEC.md`, `elixir/WORKFLOW.md`, `elixir/AGENTS.md`, `.codex/skills/` |
| `codemachine` | Multi-agent workflow engine | Engine matrix, SmartRouter, spec-to-code workflow templates, feature flags, tool health | local fork/source + web upstream | `README.md`, `docs/architecture/`, `templates/workflows/`, `prompts/agents/`, `prompts/moderator/` |
| Amplication | Platform/golden paths | Live templates, service catalog, plugin codegen, generated service lifecycle, compliance/drift | web/GitHub; clone before local planning | `docs/`, `packages/*/src/`, plugin/codegen packages if cloned |
| Spec Kit | Spec-driven artifacts | Constitution, scenarios, FR/SC IDs, spec -> plan -> tasks -> analyze -> implement | Context7 `/github/spec-kit` | templates/docs/spec workflows if cloned |
| `plandex` | Large-task implementation | Cumulative diff sandbox, plan versioning, context loading, apply/debug loop | local source + Context7 | `README.md`, `app/cli/lib/`, `app/server/db/`, first-party docs |
| `aider` | Edit loop/context map | Repo-map ranking, edit formats, lint/test repair, benchmark metadata | local source + Context7 | `aider/`, `benchmark/`, `tests/`, docs; avoid generated website data unless needed |
| `Agentless` | Bug repair/evals | Localization -> repair -> patch validation, reproduction tests, reranking | local source | `agentless/fl/`, `agentless/repair/`, `agentless/test/`, benchmark docs |
| SWE-agent/OpenHands | Bug repair/runtime research | issue-to-patch loops, sandbox/runtime harnesses, SWE-bench evaluation | Context7/web or local clone if added | source/docs/evals only when cloned |
| `codex` | Execution substrate | Sandbox profiles, approval policy, app-server protocol, typed events, AGENTS scope | local source + Context7 `/openai/codex` | `docs/`, `codex-rs/protocol/src/`, `codex-rs/exec/src/`, `codex-rs/linux-sandbox/`; avoid `vendor/` |
| `claude-code` | UX reference | Permissions, commands, plugins, MCP client UX, subagent UX | local source only; leaked mirror caveat | `src/commands/`, `src/services/mcp/`, `src/tools/`, `src/components/` |
| `qwen-code` | Terminal workflow | trusted folders, subagent fork design, terminal-capture tests, provider config | local source + Context7 | `docs/`, `packages/*/src/`, `integration-tests/terminal-capture/` |
| Kimi Code | Model-specific coding agent | long-context coding, Kimi CLI/IDE flow, model-plan comparison | Context7 `/moonshotai/kimi-cli` | docs/source if cloned |
| CodeGeeX2 | Model capability | multilingual code model, HumanEval-X/DS1000, local deployment/quantization | web/GitHub | benchmark/evaluation/docs if cloned |
| `gemini-cli` | Provider CLI/testing | release channels, generated schemas/docs, eval promotion, perf/memory tests | local source + Context7 if needed | `docs/`, `evals/`, `perf-tests/`, `memory-tests/`, `packages/*/src/` |
| `opencode` | Mode/schema boundary | plan/build modes, client/server, project-local commands/tools, canonical schema | local source + Context7 | `README.md`, `.opencode/`, `specs/`, `packages/opencode/specs/`, `packages/opencode/src/` |
| `crush` | Local runtime/TUI | SQLite/sqlc, hooks, permissions, LSP, MCP client status, Bubble Tea UI | local source | `internal/db/`, `internal/hooks/`, `internal/permission/`, `internal/agent/tools/`, `internal/ui/` |
| `goose` | Desktop/CLI/API agent | diagnostics, API embedding, provider/extension breadth, MCP client lifecycle | local source | `crates/`, `documentation/`, `ui/desktop/`; do not copy server posture |
| `letta-code` | Long-lived memory | persistent agent memory, approval recovery, skills, channel/remote UX | local source | `src/agent/`, `src/permissions/`, `src/cli/`, `src/tests/` |
| `OpenAgents` | Full-stack multi-agent platform | backend/frontend/agent split, one-agent-one-folder, plugin/data/web agents, adapters | web/GitHub; clone before local planning | `backend/`, `frontend/`, `real_agents/` if cloned |
| Claude Context / Context+ | Code context retrieval | vector-backed semantic code search, MCP-client integration, context cost reduction | Context7/web | code search/indexing packages if cloned |
| `amazon-q-developer-cli` | Rust auth/security | auth, security, workspace patterns, Rust CLI lessons | local source; lower priority | `crates/chat-cli/`, `crates/agent/`, docs |
## Comparison Matrix
| Reference | Strongest Fit For Forge | Borrow | Avoid |
@ -62,6 +104,10 @@ not a clean upstream dependency. Treat it as ergonomics evidence only.
| `neovate-code` | Design-doc and terminal UX iteration | Small design records, queued-message designs, subagent design notes, command/terminal UX records | Pulling in provider-specific branding or immature UX churn |
| `amazon-q-developer-cli` | Rust auth/security reference | Auth/security/workspace patterns and Rust CLI lessons where applicable | Product direction; local README says the open source project is no longer actively maintained |
| `open-codex` | Older/forked approval-mode comparison | Approval-mode vocabulary and provider abstraction history | Fork-specific Chat Completions direction as a primary architecture |
| `symphony` | Work orchestration above individual agents | Issue-tracker polling, per-issue isolated workspaces, repo-owned `WORKFLOW.md`, Codex app-server lifecycle, retries, operator state, CI/PR review and landing loops | High-trust unattended defaults without Forge's UOK gates and DB-first runtime evidence |
| `codemachine` | Multi-agent spec-to-code orchestration | Engine matrix, SmartRouter routing, heterogeneous agents, spec-to-code templates, feature flags, tool health, local workflow examples, upstream repeatable long-running workflow model | Optional MCP-server/tooling posture and Bun-specific implementation assumptions |
| Kimi Code | Long-context model-specific coding agent | Kimi CLI/IDE workflow, long-context coding, subagent-oriented terminal automation, model-plan comparison | Treating provider-specific subscription/API behavior as a Forge architecture |
| Spec Kit | Spec-driven development workflow | Constitution, prioritized user scenarios, acceptance criteria, functional requirements, measurable success criteria, spec -> plan -> tasks -> implement -> analyze loop | Replacing Forge PDD/UOK with a generic spec template instead of mapping useful pieces into PDD fields |
## Forge Already Has
@ -136,6 +182,20 @@ surfaces instead of adding parallel state systems.
- Stop rule: do not implement any SF MCP server, MCP worker backend, or
bundled/re-exported MCP server.
10. **Work orchestration above single agent sessions**
- Use OpenAI Symphony and CodeMachine as references.
- Target Forge surfaces: durable queue/roadmap dispatch, isolated working
directories, issue/task lifecycle state, retry/backoff, per-run
observability, proof-of-work handoff, and CI/PR review/landing loops.
- Stop rule: orchestration must feed UOK and DB-backed state instead of
bypassing Forge's safety gates.
11. **Spec-driven artifact pipeline**
- Use Spec Kit and CodeMachine as references.
- Target Forge surfaces: convert intent into PDD fields, prioritized slices,
acceptance criteria, functional requirements, measurable success criteria,
task generation, and consistency analysis before implementation.
## Priority Order
P0:
@ -155,12 +215,16 @@ P1:
- Add cumulative diff review and evidence metadata.
- Expand UOK evals with Agentless-style localization/repair/validation cases.
- Add MCP client state/status/config hardening without adding any MCP server.
- Add durable orchestration contracts for issue/task queues, isolated workspaces,
retry policy, proof-of-work, and review/landing loops.
P2:
- Improve terminal command discovery and permission UX.
- Generate settings/environment docs from typed schemas.
- Compare memory lifecycle/recovery against Letta and ACE.
- Map Spec Kit scenario/requirement/success-criteria templates into Forge PDD
fields without replacing PDD.
## Evidence Pointers
@ -201,6 +265,10 @@ The follow-up subagent pass inspected these concrete local paths:
- `ace-coder/docs/MCP_SERVER.md`,
`ace-coder/docs/plans/2026-04-05-mcp-daemon-refactor.md`,
`ace-coder/python/ai_dev/mcp/`.
- `symphony/README.md`, `symphony/SPEC.md`, `symphony/elixir/WORKFLOW.md`,
`symphony/elixir/AGENTS.md`, and `.codex/skills/land/SKILL.md`.
- `singularity/machine/README.md`, `package.json`, `templates/workflows/`,
`docs/architecture/engine-matrix.md`, and `docs/OPENAI_SPECS_DOWNLOAD.md`.
## Context7 Cross-Check
@ -226,14 +294,58 @@ snapshot available on this machine.
`/websites/qwenlm_github_io_qwen-code-docs`, and
`/websites/qwenlm_github_io_qwen-code-docs_en`.
- OpenCode: `/anomalyco/opencode`.
- OpenAI Symphony: `/openai/symphony`.
- Kimi Code: `/moonshotai/kimi-cli`,
`/websites/moonshotai_github_io_kimi-cli_en`, and `/websites/kimi_code`.
- Spec Kit: `/github/spec-kit` and `/websites/github_github_io_spec-kit`.
- Upstream CodeMachine CLI did not resolve by name in Context7 during this
pass, but GitHub confirms `https://github.com/moazbuilds/CodeMachine-CLI`
as the public upstream-style repo for CodeMachine CLI. The local checkout
inspected is `https://github.com/singularity-ng/machine.git`, so treat it as
local fork/mirror evidence rather than exact upstream state.
## Local Sift Cross-Check
ACE is private/local and should not be treated as Context7-indexed. Use `sift`
for ACE and Forge when checking private or machine-local architecture.
For dependency hygiene, do not run broad `sift search` over repo roots that may
contain vendored dependencies, package caches, build output, or generated blobs.
This `sift` install does not expose an exclude flag, so scope searches to
first-party paths such as `docs/`, `src/`, `packages/*/src/`, `specs/`,
`AGENTS.md`, `CLAUDE.md`, and known design files. Avoid `node_modules/`,
`vendor/`, `dist/`, `build/`, `target/`, `.venv/`, caches, fixture dumps, and
generated lock/schema/output directories unless the dependency surface itself is
the subject of the question.
The targeted `sift` pass found:
- Codex `codex-rs/protocol/src/config_types.rs` and `protocol.rs`: confirms
first-party typed approval policy and sandbox mode surfaces without searching
`codex-rs/vendor/`.
- OpenCode `packages/opencode/specs/effect/schema.md`: confirms the
schema-first rule to prefer one canonical schema definition and derive
compatibility schemas instead of maintaining parallel sources of truth.
- Aider first-party docs/tests: confirms local repo-map/edit-format/lint/test
and commit behavior surfaces.
- Plandex `README.md`, changelog, and first-party app model files: confirms the
cumulative diff sandbox, controlled command execution, rollback/debug loop,
and planning phases.
- Qwen Code `docs/`: confirms terminal-capture integration tests, trusted
folders documentation, and provider configuration docs.
- RA.Aid first-party docs/source: confirms shell command approval bypass via
`--cowboy-mode`, research/planning agents, and session/logging surfaces.
- Symphony first-party spec/workflow files: confirm issue-tracker polling,
per-issue workspace isolation, repo-owned `WORKFLOW.md`, Codex app-server
lifecycle, max turns/concurrency, retry/backoff, state snapshots, token/rate
observability, PR feedback sweeps, and land-loop skills.
- CodeMachine first-party docs/templates: confirm local multi-agent
orchestration, heterogeneous engine routing, spec-to-code workflow templates,
feature-flag governance, health/status commands, and optional MCP tooling.
GitHub upstream `moazbuilds/CodeMachine-CLI` confirms the public product
framing: repeatable long-running workflows, multi-agent orchestration,
parallel execution, context engineering, and headless scripting of coding
engines such as Claude Code, Codex, Cursor, and others.
- ACE `AGENTS.md`: confirms the repo-local Claude MCP client contract, hard
stops, skills, reviewer workflow, quality gate, and the warning that ACE's
autonomous system uses its own code/YAML workflow DAGs rather than
@ -249,6 +361,28 @@ The targeted `sift` pass found:
- Forge `docs/records/2026-05-07-cli-agent-code-survey.md`: now records the
MCP-client-only product boundary and roadmap pull-through.
## Implementation Follow-Up
The first DB-backed retrieval slice landed with schema v41:
- `retrieval_evidence` records backend, source kind, query, strategy, scope,
project root, git head/branch, worktree dirty flag, freshness, status, hit
count, elapsed time, cache path, error, result metadata, and timestamp.
- `sift_search` and `codebase_search` write retrieval evidence for successful
and failed searches.
- Native Context7 `resolve_library` and `get_library_docs` write docs retrieval
evidence with `freshness=external-index`.
- `search-the-web` writes web retrieval evidence with `freshness=external-live`
for success, cache hits, missing-provider errors, duplicate-loop stops,
budget exhaustion, aborts, and provider failures.
- `sf_retrieval_evidence` exposes the rows through the SF read-only DB tool
surface so agents do not query `.sf/sf.db` directly.
- Sift telemetry now uses the no-op debug logger; telemetry failures no longer
turn successful searches into failed tool calls.
Next slices should wrap `search_and_read` and `fetch_page` results in the same
evidence contract before using them for planning.
## Resulting Direction
Forge should absorb proven patterns into UOK and the existing DB-first runtime:

View file

@ -7,5 +7,5 @@ Repo-memory audits, decision ledgers, context-gardening notes, and records-keepe
| Date | Note | Summary |
|------|------|---------|
| 2026-05-01 | [repo-vcs and notifications](./2026-05-01-repo-vcs-and-notifications.md) | repo-vcs skill landed; notification specs drafted; JSDoc annotations added; placeholder docs filled |
| 2026-05-07 | [cli agent code survey](./2026-05-07-cli-agent-code-survey.md) | compared local CLI agent checkouts plus Context7 cross-checks; priority pulls are execution permissions, typed headless events, DB-first state, trust gating, cumulative diffs, eval pipelines, and MCP-client-only lifecycle hardening |
| 2026-05-07 | [SF + ACE full-stack reference survey](./2026-05-07-cli-agent-code-survey.md) | repo-wise map of coding agents, orchestration systems, retrieval tools, model references, and platform/golden-path systems; priority pulls are execution permissions, typed headless events, DB-first state, trust gating, orchestration, cumulative diffs, eval pipelines, and MCP-client-only lifecycle hardening |
| 2026-05-07 | [strategy alignment](./2026-05-07-strategy-alignment.md) | aligned top-level docs and roadmap framing around Forge as product, UOK as kernel, and external CLIs as sharpening inputs |

View file

@ -32,7 +32,7 @@ Step-by-step setup instructions for every LLM provider SF supports. If you ran t
|----------|-------------|-------------|-------------|
| Anthropic | API key | `ANTHROPIC_API_KEY` | — |
| OpenAI | API key | `OPENAI_API_KEY` | — |
| Google Gemini | Gemini CLI Core auth | — | `~/.gemini/oauth_creds.json` |
| Google Gemini | Gemini CLI Core auth (default) or API key when enabled in config | `GEMINI_API_KEY` | `~/.gemini/oauth_creds.json` |
| OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` |
| Groq | API key | `GROQ_API_KEY` | — |
| xAI | API key | `XAI_API_KEY` | — |
@ -92,8 +92,19 @@ Authenticate there once and let SF reuse the stored auth state.
gemini login
```
SF intentionally ignores ambient `GEMINI_API_KEY` and
`GOOGLE_GENERATIVE_AI_API_KEY` values for Forge runtime selection.
By default, Forge ignores ambient `GEMINI_API_KEY` and
`GOOGLE_GENERATIVE_AI_API_KEY` values for runtime selection. To allow env auth
for the direct `google` provider, set it in config:
```json
{
"providerEnvAuth": {
"providers": {
"google": "on"
}
}
}
```
### OpenRouter

View file

@ -8,7 +8,7 @@ Step-by-step setup instructions for every LLM provider SF supports. If you ran t
|----------|-------------|---------------------|
| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` |
| OpenAI | API key | `OPENAI_API_KEY` |
| Google Gemini | Gemini CLI Core auth | `~/.gemini/oauth_creds.json` |
| Google Gemini | Gemini CLI Core auth (default) or API key when enabled in config | `GEMINI_API_KEY` |
| OpenRouter | API key | `OPENROUTER_API_KEY` |
| Groq | API key | `GROQ_API_KEY` |
| xAI (Grok) | API key | `XAI_API_KEY` |
@ -58,8 +58,18 @@ Authenticate Gemini CLI Core once and let SF reuse that state:
gemini login
```
SF intentionally ignores `GEMINI_API_KEY` and `GOOGLE_GENERATIVE_AI_API_KEY`
for Forge runtime selection.
Forge ignores `GEMINI_API_KEY` and `GOOGLE_GENERATIVE_AI_API_KEY` by default.
To let the direct `google` provider use env auth, enable it in config:
```json
{
"providerEnvAuth": {
"providers": {
"google": "on"
}
}
}
```
### OpenRouter

View file

@ -18,8 +18,8 @@
|----------|----------|
| `ANTHROPIC_API_KEY` | Anthropic (Claude) |
| `OPENAI_API_KEY` | OpenAI |
| `GEMINI_API_KEY` | Google Gemini (ignored by Forge runtime; Gemini CLI Core auth is used instead) |
| `GOOGLE_GENERATIVE_AI_API_KEY` | Google Gemini alias (ignored by Forge runtime) |
| `GEMINI_API_KEY` | Google Gemini (available to the direct `google` provider, but disabled by default via `providerEnvAuth`) |
| `GOOGLE_GENERATIVE_AI_API_KEY` | Google Gemini alias (same policy as `GEMINI_API_KEY`) |
| `OPENROUTER_API_KEY` | OpenRouter |
| `GROQ_API_KEY` | Groq |
| `XAI_API_KEY` | xAI (Grok) |

View file

@ -3,7 +3,7 @@ import { describe, it } from "vitest";
import { getEnvApiKey } from "./env-api-keys.js";
describe("getEnvApiKey", () => {
it("ignores GEMINI_API_KEY for google when present", () => {
it("uses GEMINI_API_KEY for google when present", () => {
const savedGemini = process.env.GEMINI_API_KEY;
const savedGoogleGenerative = process.env.GOOGLE_GENERATIVE_AI_API_KEY;
@ -11,8 +11,7 @@ describe("getEnvApiKey", () => {
process.env.GOOGLE_GENERATIVE_AI_API_KEY = "google-generative-key";
try {
assert.equal(getEnvApiKey("google"), undefined);
assert.equal(getEnvApiKey("google-gemini-cli"), undefined);
assert.equal(getEnvApiKey("google"), "gemini-key");
} finally {
if (savedGemini === undefined) delete process.env.GEMINI_API_KEY;
else process.env.GEMINI_API_KEY = savedGemini;
@ -22,7 +21,7 @@ describe("getEnvApiKey", () => {
}
});
it("ignores GOOGLE_GENERATIVE_AI_API_KEY for google", () => {
it("accepts GOOGLE_GENERATIVE_AI_API_KEY for google", () => {
const savedGemini = process.env.GEMINI_API_KEY;
const savedGoogleGenerative = process.env.GOOGLE_GENERATIVE_AI_API_KEY;
@ -30,7 +29,7 @@ describe("getEnvApiKey", () => {
process.env.GOOGLE_GENERATIVE_AI_API_KEY = "google-generative-key";
try {
assert.equal(getEnvApiKey("google"), undefined);
assert.equal(getEnvApiKey("google"), "google-generative-key");
} finally {
if (savedGemini === undefined) delete process.env.GEMINI_API_KEY;
else process.env.GEMINI_API_KEY = savedGemini;

View file

@ -73,13 +73,6 @@ function hasVertexAdcCredentials(): boolean {
export function getEnvApiKey(provider: KnownProvider): string | undefined;
export function getEnvApiKey(provider: string): string | undefined;
export function getEnvApiKey(provider: any): string | undefined {
// Forge routes Gemini-family models through google-gemini-cli, which owns
// auth via Gemini CLI Core state. Intentionally ignore Google API-key env vars
// here so ambient GEMINI_API_KEY values do not change provider selection.
if (provider === "google" || provider === "google-gemini-cli") {
return undefined;
}
// Fall back to environment variables
if (provider === "github-copilot") {
return (
@ -161,6 +154,7 @@ export function getEnvApiKey(provider: any): string | undefined {
const envMap: Record<string, string | string[]> = {
openai: "OPENAI_API_KEY",
"azure-openai-responses": "AZURE_OPENAI_API_KEY",
google: ["GEMINI_API_KEY", "GOOGLE_GENERATIVE_AI_API_KEY"],
groq: "GROQ_API_KEY",
cerebras: "CEREBRAS_API_KEY",
xai: "XAI_API_KEY",

View file

@ -37,10 +37,6 @@ function hasVertexAdcCredentials(): boolean {
export function getEnvApiKey(provider: KnownProvider): string | undefined;
export function getEnvApiKey(provider: string): string | undefined;
export function getEnvApiKey(provider: string): string | undefined {
if (provider === "google" || provider === "google-gemini-cli") {
return undefined;
}
if (provider === "github-copilot") {
return (
process.env.COPILOT_GITHUB_TOKEN ||
@ -93,6 +89,7 @@ export function getEnvApiKey(provider: string): string | undefined {
const envMap: Record<string, string | string[]> = {
openai: "OPENAI_API_KEY",
"azure-openai-responses": "AZURE_OPENAI_API_KEY",
google: ["GEMINI_API_KEY", "GOOGLE_GENERATIVE_AI_API_KEY"],
groq: "GROQ_API_KEY",
cerebras: "CEREBRAS_API_KEY",
xai: "XAI_API_KEY",

View file

@ -32,6 +32,7 @@ import { getAgentDir } from "../config.js";
import { AUTH_LOCK_STALE_MS } from "./constants.js";
import { acquireLockAsync, acquireLockSyncWithRetry } from "./lock-utils.js";
import { resolveConfigValueAsync } from "./resolve-config-value.js";
import type { ProviderEnvAuthMode } from "./settings-manager.js";
export type ApiKeyCredential = {
type: "api_key";
@ -266,6 +267,7 @@ export class AuthStorage {
private data: AuthStorageData = {};
private runtimeOverrides: Map<string, string> = new Map();
private fallbackResolver?: (provider: string) => string | undefined;
private envAuthModeResolver?: (provider: string) => ProviderEnvAuthMode;
private loadError: Error | null = null;
private errors: Error[] = [];
private credentialChangeListeners: Set<() => void> = new Set();
@ -337,6 +339,12 @@ export class AuthStorage {
this.fallbackResolver = resolver;
}
setEnvAuthModeResolver(
resolver: (provider: string) => ProviderEnvAuthMode,
): void {
this.envAuthModeResolver = resolver;
}
/**
* Register a callback to be notified when credentials change (e.g., after OAuth token refresh).
* Returns a function to unregister the listener.
@ -500,7 +508,7 @@ export class AuthStorage {
hasAuth(provider: string): boolean {
if (this.runtimeOverrides.has(provider)) return true;
if (this.data[provider]) return true;
if (getEnvApiKey(provider)) return true;
if (this.getConfiguredEnvApiKey(provider)) return true;
if (this.fallbackResolver?.(provider)) return true;
return false;
}
@ -982,9 +990,8 @@ export class AuthStorage {
// All credentials backed off or unresolvable - fall through to env/fallback
}
// Fall back to environment variable. Gemini-family providers intentionally
// ignore ambient GEMINI_API_KEY values via getEnvApiKey().
const envKey = getEnvApiKey(providerId);
// Fall back to environment variable when provider policy allows it.
const envKey = this.getConfiguredEnvApiKey(providerId);
if (envKey) {
// Block Google OAuth tokens from environment variables (e.g., GEMINI_API_KEY=ya29.*)
if (
@ -1007,6 +1014,16 @@ export class AuthStorage {
return this.fallbackResolver?.(providerId) ?? undefined;
}
private getConfiguredEnvApiKey(provider: string): string | undefined {
const mode =
this.envAuthModeResolver?.(provider) ??
(provider === "google" || provider === "google-gemini-cli"
? "off"
: "auto");
if (mode === "off") return undefined;
return getEnvApiKey(provider);
}
/**
* Get all registered OAuth providers
*/

View file

@ -82,14 +82,13 @@ function createResolver(overrides?: {
// ─── findFallback ────────────────────────────────────────────────────────────
describe("FallbackResolver — findFallback", () => {
it("returns next available provider when current fails", async () => {
it("reselects from the current available models when current fails", async () => {
const { resolver } = createResolver();
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
assert.notEqual(result, null);
assert.equal(result!.model.provider, "alibaba");
assert.equal(result!.model.id, "glm-5");
assert.equal(result!.chainName, "coding");
assert.equal(result!.chainName, "fresh-selection");
});
it("marks current provider as exhausted for rate_limit errors", async () => {
@ -142,12 +141,12 @@ describe("FallbackResolver — findFallback", () => {
assert.equal(result, null);
});
it("falls back to free selection when model is not in any chain", async () => {
it("reselects from scratch when model is not in any chain", async () => {
const { resolver } = createResolver();
const unknownModel = createMockModel("unknown", "some-model");
const result = await resolver.findFallback(unknownModel, "quota_exhausted");
assert.notEqual(result, null);
assert.equal(result!.chainName, "free-selection");
assert.equal(result!.chainName, "fresh-selection");
// Should pick an available model with different provider
assert.notEqual(result!.model.provider, "unknown");
});
@ -208,11 +207,7 @@ describe("FallbackResolver — findFallback", () => {
it("skips providers with no model in registry", async () => {
const { resolver } = createResolver({
find: (provider: string, modelId: string) => {
if (provider === "alibaba") return undefined;
if (provider === "openai" && modelId === "gpt-4.1") return openaiModel;
return undefined;
},
getAvailable: () => [openaiModel],
});
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
@ -225,33 +220,9 @@ describe("FallbackResolver — findFallback", () => {
// ─── checkForRestoration ─────────────────────────────────────────────────────
describe("FallbackResolver — checkForRestoration", () => {
it("returns higher-priority provider when recovered", async () => {
it("returns null because restoration is disabled", async () => {
const { resolver } = createResolver();
const result = await resolver.checkForRestoration(alibabaModel);
assert.notEqual(result, null);
assert.equal(result!.model.provider, "zai");
assert.equal(result!.model.id, "glm-5");
});
it("returns null when already at highest priority", async () => {
const { resolver } = createResolver();
const result = await resolver.checkForRestoration(zaiModel);
assert.equal(result, null);
});
it("returns null when higher-priority provider is still backed off", async () => {
const { resolver } = createResolver({
isProviderAvailable: (provider: string) => provider !== "zai",
});
const result = await resolver.checkForRestoration(alibabaModel);
assert.equal(result, null);
});
it("returns null when fallback is disabled", async () => {
const { resolver } = createResolver({ enabled: false });
const result = await resolver.checkForRestoration(alibabaModel);
assert.equal(result, null);
});
});

View file

@ -2,11 +2,10 @@
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
/**
* FallbackResolver - Cross-provider fallback when rate/quota limits are hit.
* FallbackResolver - Fresh model reselection when rate/quota limits are hit.
*
* When a provider's credentials are all exhausted, this resolver finds the next
* available provider+model from a user-configured fallback chain. It also handles
* restoration: checking if a higher-priority provider has recovered before each request.
* When a provider/model becomes unhealthy, this resolver picks a fresh model from
* the current available registry rather than walking a preconfigured fallback chain.
*/
import type { Api, Model } from "@singularity-forge/pi-ai";
@ -31,20 +30,16 @@ export class FallbackResolver {
) {}
/**
* Find the next available fallback for a model that just failed.
* Searches all chains for entries matching the current model's provider+id,
* then returns the next available entry with lower priority (higher number).
* Find a fresh replacement for a model that just failed.
* Ignores fallback chains and reselects from the current available registry.
*
* If no chain contains the current model, falls through to free selection:
* picks any available model from the registry with a different provider.
*
* @returns FallbackResult if a fallback is available, null otherwise
* @returns FallbackResult if a replacement is available, null otherwise
*/
async findFallback(
currentModel: Model<Api>,
errorType: UsageLimitErrorType,
): Promise<FallbackResult | null> {
const { enabled, chains } = this.settingsManager.getFallbackSettings();
const { enabled } = this.settingsManager.getFallbackSettings();
if (!enabled) return null;
// Mark the current provider as exhausted at the provider level.
@ -55,75 +50,16 @@ export class FallbackResolver {
this.authStorage.markProviderExhausted(currentModel.provider, errorType);
}
// Search all chains for one containing the current model
for (const [chainName, entries] of Object.entries(chains)) {
const currentIndex = entries.findIndex(
(e) =>
e.provider === currentModel.provider && e.model === currentModel.id,
);
if (currentIndex === -1) continue;
// Try entries after the current one (already sorted by priority)
const result = await this._findAvailableInChain(
chainName,
entries,
currentIndex + 1,
);
if (result) return result;
// Wrap around: try entries before the current one
const wrapResult = await this._findAvailableInChain(
chainName,
entries,
0,
currentIndex,
);
if (wrapResult) return wrapResult;
}
// No chain contained the current model — fall through to free selection
// from any available model in the registry with a different provider.
return this._findAnyAvailableFallback(currentModel);
}
/**
* Check if a higher-priority provider in the chain has recovered.
* Called before each LLM request to restore the best available provider.
*
* @returns FallbackResult if a better provider is available, null if current is best
* Automatic restoration is disabled when replacement is always reselected
* from scratch instead of following a chain.
*/
async checkForRestoration(
currentModel: Model<Api>,
_currentModel: Model<Api>,
): Promise<FallbackResult | null> {
const { enabled, chains } = this.settingsManager.getFallbackSettings();
if (!enabled) return null;
for (const [chainName, entries] of Object.entries(chains)) {
const currentIndex = entries.findIndex(
(e) =>
e.provider === currentModel.provider && e.model === currentModel.id,
);
if (currentIndex === -1) continue;
// Only check entries with higher priority (lower index = higher priority)
if (currentIndex === 0) continue; // Already at highest priority
const result = await this._findAvailableInChain(
chainName,
entries,
0,
currentIndex,
);
if (result) {
return {
...result,
reason: `${result.model.provider}/${result.model.id} recovered, restoring from fallback`,
};
}
}
return null;
}
@ -227,8 +163,8 @@ export class FallbackResolver {
const chosen = candidates[0];
return {
model: chosen,
chainName: "free-selection",
reason: `free fallback to ${chosen.provider}/${chosen.id} (no chain configured)`,
chainName: "fresh-selection",
reason: `reselected ${chosen.provider}/${chosen.id} from available models`,
};
}
}

View file

@ -2,6 +2,7 @@ import assert from "node:assert/strict";
import { describe, it } from "vitest";
import type { AuthStorage } from "./auth-storage.js";
import { ModelRegistry } from "./model-registry.js";
import { type Settings, SettingsManager } from "./settings-manager.js";
function createRegistryWithCapturedResolver() {
let capturedResolver: ((provider: string) => string | undefined) | undefined;
@ -11,6 +12,7 @@ function createRegistryWithCapturedResolver() {
) => {
capturedResolver = resolver;
},
setEnvAuthModeResolver: () => {},
onCredentialChange: () => {},
getOAuthProviders: () => [],
get: () => undefined,
@ -26,6 +28,29 @@ function createRegistryWithCapturedResolver() {
return capturedResolver!;
}
function createRegistryWithSettingsAndCapturedResolver(
settings: Partial<Settings>,
) {
let capturedResolver: ((provider: string) => string | undefined) | undefined;
const authStorage = {
setFallbackResolver: (
resolver: (provider: string) => string | undefined,
) => {
capturedResolver = resolver;
},
setEnvAuthModeResolver: () => {},
onCredentialChange: () => {},
getOAuthProviders: () => [],
get: () => undefined,
hasAuth: () => false,
getApiKey: async () => undefined,
} as unknown as AuthStorage;
new ModelRegistry(authStorage, undefined, SettingsManager.inMemory(settings));
assert.ok(capturedResolver);
return capturedResolver!;
}
describe("ModelRegistry env fallback resolver (#3782)", () => {
it("falls back to built-in provider env vars when models.json has no custom key", () => {
const prev = process.env.MINIMAX_API_KEY;
@ -61,4 +86,38 @@ describe("ModelRegistry env fallback resolver (#3782)", () => {
}
}
});
it("disables google env fallback by default", () => {
const prev = process.env.GEMINI_API_KEY;
process.env.GEMINI_API_KEY = "gemini-env-test-key";
try {
const resolver = createRegistryWithSettingsAndCapturedResolver({});
assert.equal(resolver("google"), undefined);
} finally {
if (prev === undefined) {
delete process.env.GEMINI_API_KEY;
} else {
process.env.GEMINI_API_KEY = prev;
}
}
});
it("allows provider env fallback when providerEnvAuth is on", () => {
const prev = process.env.GEMINI_API_KEY;
process.env.GEMINI_API_KEY = "gemini-env-test-key";
try {
const resolver = createRegistryWithSettingsAndCapturedResolver({
providerEnvAuth: { providers: { google: "on" } },
});
assert.equal(resolver("google"), "gemini-env-test-key");
} finally {
if (prev === undefined) {
delete process.env.GEMINI_API_KEY;
} else {
process.env.GEMINI_API_KEY = prev;
}
}
});
});

View file

@ -486,6 +486,19 @@ export class ModelRegistry {
discoveryCache?: ModelDiscoveryCache,
) {
this.discoveryCache = discoveryCache ?? new ModelDiscoveryCache();
(
this.authStorage as {
setEnvAuthModeResolver?: (
resolver: (provider: string) => string,
) => void;
}
).setEnvAuthModeResolver?.(
(provider) =>
this.settingsManager?.getProviderEnvAuthMode(provider) ??
(provider === "google" || provider === "google-gemini-cli"
? "off"
: "auto"),
);
// Set up fallback resolver for custom provider API keys
this.authStorage.setFallbackResolver((provider) => {
@ -493,6 +506,14 @@ export class ModelRegistry {
if (keyConfig) {
return resolveConfigValue(keyConfig);
}
if (
(this.settingsManager?.getProviderEnvAuthMode(provider) ??
(provider === "google" || provider === "google-gemini-cli"
? "off"
: "auto")) === "off"
) {
return undefined;
}
return getEnvApiKey(provider);
});

View file

@ -206,8 +206,8 @@ export class RetryHandler {
}
}
// Cross-provider fallback — for rate limits with all creds backed off,
// quota errors, or auth errors (invalid/expired key — no point retrying).
// Fresh model reselection — for rate limits, quota errors, or auth errors
// once the same-model retry budget has been meaningfully exercised.
const isAuthError = errorType === "auth_error";
if (isRateLimit || isQuotaError || isAuthError) {
// For quota errors with a retry-after hint, wait before switching providers.
@ -260,67 +260,39 @@ export class RetryHandler {
return true;
}
}
const fallbackResult = await this._deps.fallbackResolver.findFallback(
this._deps.getModel()!,
errorType,
);
if (fallbackResult) {
const previousProvider = this._deps.getModel()!.provider;
this._deps.agent.setModel(fallbackResult.model);
this._deps.onModelChange(fallbackResult.model);
this._removeLastAssistantError();
this._deps.emit({
type: "fallback_provider_switch",
from: `${previousProvider}/${this._deps.getModel()?.id}`,
to: `${fallbackResult.model.provider}/${fallbackResult.model.id}`,
reason: fallbackResult.reason,
});
this._deps.emit({
type: "auto_retry_start",
attempt: this._retryAttempt + 1,
maxAttempts: settings.maxRetries,
delayMs: 0,
errorMessage: `${message.errorMessage} (${fallbackResult.reason})`,
});
// Retry immediately with fallback provider - don't increment _retryAttempt
this._scheduleContinue(retryGeneration);
return true;
}
// No fallback available either
if (isQuotaError) {
// Try long-context model downgrade ([1m] → base) before giving up
const downgraded = this._tryLongContextDowngrade(
const provider = this._deps.getModel()!.provider;
const authMode = this._deps.modelRegistry.getProviderAuthMode(provider);
const shouldReselectImmediately =
isQuotaError ||
isAuthError ||
this._isCapacityError(message.errorMessage) ||
(isRateLimit && authMode === "externalCli");
if (shouldReselectImmediately) {
return this._tryFreshModelSelection(
message,
errorType,
retryGeneration,
);
if (downgraded) return true;
this._deps.emit({
type: "fallback_chain_exhausted",
reason: `All providers exhausted for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`,
});
this._deps.emit({
type: "auto_retry_end",
success: false,
attempt: this._retryAttempt,
finalError: message.errorMessage,
});
this._retryAttempt = 0;
this._resolveRetry();
return false;
}
}
}
this._retryAttempt++;
const errorType = message.errorMessage
? this._classifyErrorType(message.errorMessage)
: "unknown";
const isRateLimit = errorType === "rate_limit";
const isQuotaError = errorType === "quota_exhausted";
const isAuthError = errorType === "auth_error";
const reselectionThreshold = Math.min(settings.maxRetries, 3);
if (
(isRateLimit || isQuotaError || isAuthError) &&
this._retryAttempt >= reselectionThreshold
) {
return this._tryFreshModelSelection(message, errorType, retryGeneration);
}
if (this._retryAttempt > settings.maxRetries) {
this._deps.emit({
type: "auto_retry_end",
@ -515,6 +487,72 @@ export class RetryHandler {
return "unknown";
}
private _isCapacityError(errorMessage: string): boolean {
return /no capacity|capacity.*available|server.*busy|too busy/i.test(
errorMessage,
);
}
private async _tryFreshModelSelection(
message: AssistantMessage,
errorType: UsageLimitErrorType,
retryGeneration: number,
): Promise<boolean> {
const replacement = await this._deps.fallbackResolver.findFallback(
this._deps.getModel()!,
errorType,
);
if (replacement) {
const previousModel = this._deps.getModel()!;
this._deps.agent.setModel(replacement.model);
this._deps.onModelChange(replacement.model);
this._removeLastAssistantError();
this._deps.emit({
type: "fallback_provider_switch",
from: `${previousModel.provider}/${previousModel.id}`,
to: `${replacement.model.provider}/${replacement.model.id}`,
reason: replacement.reason,
});
this._deps.emit({
type: "auto_retry_start",
attempt: Math.max(this._retryAttempt, 1),
maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries,
delayMs: 0,
errorMessage: `${message.errorMessage} (${replacement.reason})`,
});
this._scheduleContinue(retryGeneration);
return true;
}
if (errorType === "quota_exhausted") {
const downgraded = this._tryLongContextDowngrade(
message,
retryGeneration,
);
if (downgraded) return true;
this._deps.emit({
type: "fallback_chain_exhausted",
reason: `No replacement model available for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`,
});
this._deps.emit({
type: "auto_retry_end",
success: false,
attempt: this._retryAttempt,
finalError: message.errorMessage,
});
this._retryAttempt = 0;
this._resolveRetry();
return false;
}
return false;
}
/**
* Attempt a same-model retry by reducing maxTokens when provider reports
* an affordability cap (e.g., "can only afford 329").

View file

@ -1002,7 +1002,9 @@ export class SettingsManager {
return (
this.settings.providerEnvAuth?.providers?.[provider] ??
this.settings.providerEnvAuth?.default ??
"auto"
(provider === "google" || provider === "google-gemini-cli"
? "off"
: "auto")
);
}

View file

@ -13,7 +13,7 @@ import type { ModelRegistry } from "../../../core/model-registry.js";
import type { SettingsManager } from "../../../core/settings-manager.js";
import { theme } from "../theme/theme.js";
import { DynamicBorder } from "./dynamic-border.js";
import { keyHint } from "./keybinding-hints.js";
import { keyHint, rawKeyHint } from "./keybinding-hints.js";
/** Display names for providers in the model selector UI. */
const PROVIDER_DISPLAY_NAMES: Record<string, string> = {
@ -348,7 +348,7 @@ export class ModelSelectorComponent extends Container implements Focusable {
return (
keyHint("tab", "scope") +
theme.fg("muted", " (all/scoped) ") +
keyHint("d", "disable")
rawKeyHint("d", "disable")
);
}

View file

@ -29,6 +29,7 @@ import {
truncateHead,
} from "@singularity-forge/pi-coding-agent";
import { Text } from "@singularity-forge/pi-tui";
import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js";
// ─── In-session cache ─────────────────────────────────────────────────────────
// Keyed by lowercased query string
@ -133,9 +134,33 @@ export default function (pi) {
),
}),
async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
const startedAt = Date.now();
const projectRoot = process.cwd();
const cacheKey = params.libraryName.toLowerCase().trim();
if (searchCache.has(cacheKey)) {
const cached = searchCache.get(cacheKey);
await recordRetrievalEvidence(projectRoot, {
backend: "context7",
sourceKind: "docs",
query: params.query
? `${params.libraryName} ${params.query}`
: params.libraryName,
strategy: "library-search",
scope: params.libraryName,
freshness: "external-index",
status: "ok",
hitCount: cached.length,
elapsedMs: Date.now() - startedAt,
result: {
cached: true,
libraries: cached.map((lib) => ({
id: lib.id,
title: lib.title,
trustScore: lib.trustScore,
benchmarkScore: lib.benchmarkScore,
})),
},
});
return {
content: [
{
@ -159,6 +184,20 @@ export default function (pi) {
libs = Array.isArray(data?.results) ? data.results : [];
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
await recordRetrievalEvidence(projectRoot, {
backend: "context7",
sourceKind: "docs",
query: params.query
? `${params.libraryName} ${params.query}`
: params.libraryName,
strategy: "library-search",
scope: params.libraryName,
freshness: "external-index",
status: "error",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: msg,
});
return {
content: [{ type: "text", text: `Context7 search failed: ${msg}` }],
isError: true,
@ -171,6 +210,28 @@ export default function (pi) {
};
}
searchCache.set(cacheKey, libs);
await recordRetrievalEvidence(projectRoot, {
backend: "context7",
sourceKind: "docs",
query: params.query
? `${params.libraryName} ${params.query}`
: params.libraryName,
strategy: "library-search",
scope: params.libraryName,
freshness: "external-index",
status: "ok",
hitCount: libs.length,
elapsedMs: Date.now() - startedAt,
result: {
cached: false,
libraries: libs.map((lib) => ({
id: lib.id,
title: lib.title,
trustScore: lib.trustScore,
benchmarkScore: lib.benchmarkScore,
})),
},
});
return {
content: [
{ type: "text", text: formatLibraryList(libs, params.libraryName) },
@ -246,6 +307,8 @@ export default function (pi) {
),
}),
async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
const startedAt = Date.now();
const projectRoot = process.cwd();
const tokens = Math.min(Math.max(params.tokens ?? 5000, 500), 10000);
// Strip accidental leading @ that some models inject
const libraryId = params.libraryId.startsWith("@")
@ -255,6 +318,22 @@ export default function (pi) {
const cacheKey = `${libraryId}::${query ?? ""}::${tokens}`;
if (docCache.has(cacheKey)) {
const cached = docCache.get(cacheKey);
await recordRetrievalEvidence(projectRoot, {
backend: "context7",
sourceKind: "docs",
query: query ?? "",
strategy: "docs-fetch",
scope: libraryId,
freshness: "external-index",
status: "ok",
hitCount: cached.trim() ? 1 : 0,
elapsedMs: Date.now() - startedAt,
result: {
cached: true,
tokens,
charCount: cached.length,
},
});
return {
content: [{ type: "text", text: cached }],
details: {
@ -276,6 +355,19 @@ export default function (pi) {
rawText = await apiFetchText(url.toString(), signal);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
await recordRetrievalEvidence(projectRoot, {
backend: "context7",
sourceKind: "docs",
query: query ?? "",
strategy: "docs-fetch",
scope: libraryId,
freshness: "external-index",
status: "error",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: msg,
result: { tokens },
});
return {
content: [
{ type: "text", text: `Context7 doc fetch failed: ${msg}` },
@ -296,6 +388,18 @@ export default function (pi) {
const notFound = query
? `No documentation found for "${query}" in ${libraryId}. Try a broader query or different library ID.`
: `No documentation found for ${libraryId}. Try resolve_library to verify the library ID.`;
await recordRetrievalEvidence(projectRoot, {
backend: "context7",
sourceKind: "docs",
query: query ?? "",
strategy: "docs-fetch",
scope: libraryId,
freshness: "external-index",
status: "empty",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
result: { tokens },
});
return {
content: [{ type: "text", text: notFound }],
details: {
@ -321,6 +425,23 @@ export default function (pi) {
` Use a more specific query to reduce output size.]`;
}
docCache.set(cacheKey, finalText);
await recordRetrievalEvidence(projectRoot, {
backend: "context7",
sourceKind: "docs",
query: query ?? "",
strategy: "docs-fetch",
scope: libraryId,
freshness: "external-index",
status: "ok",
hitCount: 1,
elapsedMs: Date.now() - startedAt,
result: {
cached: false,
tokens,
truncated: truncation.truncated,
charCount: finalText.length,
},
});
return {
content: [{ type: "text", text: finalText }],
details: {

View file

@ -18,6 +18,7 @@ import {
truncateHead,
} from "@singularity-forge/pi-coding-agent";
import { Text } from "@singularity-forge/pi-tui";
import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js";
import { LRUTTLCache } from "./cache.js";
import { formatSearchResults } from "./format.js";
import { classifyError, fetchWithRetry, fetchWithRetryTimed } from "./http.js";
@ -58,6 +59,21 @@ export function resetSearchLoopGuardState() {
consecutiveDupeCount = 0;
sessionTotalSearches = 0;
}
function summarizeWebResults(results) {
return results.map((result) => ({
title: result.title,
url: result.url,
age: result.age,
}));
}
async function recordWebSearchEvidence(projectRoot, entry) {
await recordRetrievalEvidence(projectRoot, {
backend: "search-the-web",
sourceKind: "web",
freshness: "external-live",
...entry,
});
}
// Summarizer responses: max 50 entries, 15-minute TTL
const summarizerCache = new LRUTTLCache({ max: 50, ttlMs: 900_000 });
// =============================================================================
@ -575,7 +591,18 @@ export function registerSearchTool(pi) {
),
}),
async execute(_toolCallId, params, signal, onUpdate, _ctx) {
const projectRoot = process.cwd();
const startedAt = Date.now();
if (signal?.aborted) {
await recordWebSearchEvidence(projectRoot, {
query: params.query ?? "",
strategy: "aborted",
scope: params.domain ?? "",
status: "aborted",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: "Search cancelled",
});
return {
content: [{ type: "text", text: "Search cancelled." }],
details: undefined,
@ -586,6 +613,15 @@ export function registerSearchTool(pi) {
// ------------------------------------------------------------------
const provider = resolveSearchProvider();
if (!provider) {
await recordWebSearchEvidence(projectRoot, {
query: params.query,
strategy: "none",
scope: params.domain ?? "",
status: "error",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: "No search API key set",
});
return {
content: [
{
@ -604,6 +640,19 @@ export function registerSearchTool(pi) {
// Session-level search budget
// ------------------------------------------------------------------
if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) {
await recordWebSearchEvidence(projectRoot, {
query: params.query,
strategy: provider,
scope: params.domain ?? "",
status: "budget_exhausted",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})`,
result: {
sessionTotalSearches,
maxSearches: MAX_SEARCHES_PER_SESSION,
},
});
return {
content: [
{
@ -661,6 +710,16 @@ export function registerSearchTool(pi) {
if (cacheKey === lastSearchKey) {
consecutiveDupeCount++;
if (consecutiveDupeCount > MAX_CONSECUTIVE_DUPES) {
await recordWebSearchEvidence(projectRoot, {
query: params.query,
strategy: provider,
scope: params.domain ?? "",
status: "search_loop",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: "Consecutive duplicate search detected",
result: { consecutiveDupeCount },
});
return {
content: [
{
@ -727,6 +786,21 @@ export function registerSearchTool(pi) {
moreResultsAvailable: cached.moreResultsAvailable,
provider,
};
await recordWebSearchEvidence(projectRoot, {
query: params.query,
strategy: provider,
scope: params.domain ?? "",
status: "ok",
hitCount: limited.length,
elapsedMs: Date.now() - startedAt,
result: {
cached: true,
effectiveQuery,
freshness: freshness || "none",
hasSummary: !!summaryText,
results: summarizeWebResults(limited),
},
});
return { content: [{ type: "text", text: content }], details };
}
onUpdate?.({
@ -864,9 +938,38 @@ export function registerSearchTool(pi) {
moreResultsAvailable: searchResult.moreResultsAvailable,
provider,
};
await recordWebSearchEvidence(projectRoot, {
query: params.query,
strategy: provider,
scope: params.domain ?? "",
status: "ok",
hitCount: results.length,
elapsedMs: Date.now() - startedAt,
result: {
cached: false,
effectiveQuery,
freshness: freshness || "none",
hasSummary: !!summaryText,
latencyMs,
results: summarizeWebResults(results),
},
});
return { content: [{ type: "text", text: content }], details };
} catch (error) {
const classified = classifyError(error);
await recordWebSearchEvidence(projectRoot, {
query: params.query,
strategy: provider,
scope: params.domain ?? "",
status: "error",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: classified.message,
result: {
errorKind: classified.kind,
retryAfterMs: classified.retryAfterMs,
},
});
return {
content: [
{ type: "text", text: `Search failed: ${classified.message}` },

View file

@ -1,5 +1,6 @@
// SF2 — Read-only query tools exposing DB state to the LLM via the WAL connection
import { Type } from "@sinclair/typebox";
import { getRetrievalEvidence } from "../sf-db.js";
import { executeMilestoneStatus } from "../tools/workflow-tool-executors.js";
import { ensureDbOpen } from "./dynamic-tools.js";
export function registerQueryTools(pi) {
@ -36,4 +37,59 @@ export function registerQueryTools(pi) {
return executeMilestoneStatus(params);
},
});
pi.registerTool({
name: "sf_retrieval_evidence",
label: "Retrieval Evidence",
description:
"Read recent retrieval provenance from the SF database. Returns source backend, query, scope, freshness, status, and result metadata. " +
"Use this instead of querying .sf/sf.db directly when auditing Sift, codebase_search, Context7, or web-derived context.",
promptSnippet:
"Inspect recent retrieval evidence rows with backend, scope, freshness, and hit counts",
promptGuidelines: [
"Use this to verify whether context came from live source, stale indexed docs, or another retrieval backend before trusting it.",
"Prefer rows with backend=sift and freshness=working-tree/git-head for implementation decisions.",
],
parameters: Type.Object({
limit: Type.Optional(
Type.Number({
description: "Maximum number of retrieval evidence rows to return.",
default: 20,
}),
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const dbAvailable = await ensureDbOpen();
if (!dbAvailable) {
return {
content: [
{
type: "text",
text: "Error: SF database is not available. Cannot read retrieval evidence.",
},
],
details: {
operation: "retrieval_evidence",
error: "db_unavailable",
},
};
}
const limit = Math.max(1, Math.min(100, params.limit ?? 20));
const rows = getRetrievalEvidence(limit);
const lines = [`Retrieval evidence: ${rows.length} row(s)`, ""];
for (const row of rows) {
lines.push(
`- #${row.id} ${row.backend}/${row.sourceKind} ${row.status} ` +
`freshness=${row.freshness} scope=${row.scope || "."} hits=${row.hitCount} ` +
`query="${row.query}"`,
);
}
return {
content: [{ type: "text", text: lines.join("\n") }],
details: {
operation: "retrieval_evidence",
rows,
},
};
},
});
}

View file

@ -11,10 +11,10 @@
* - Optional search/tool integrations (Brave, Tavily, Jina, Context7)
*/
import { existsSync } from "node:fs";
import { getEnvApiKey } from "@singularity-forge/pi-ai";
import { AuthStorage } from "@singularity-forge/pi-coding-agent";
import { getAuthPath, PROVIDER_REGISTRY } from "./key-manager.js";
import { loadEffectiveSFPreferences } from "./preferences.js";
import { getConfiguredEnvApiKey } from "./provider-env-auth.js";
import {
couldBeVaultUri,
hasProviderCredentialEnvVar,
@ -141,7 +141,7 @@ function resolveKey(providerId) {
// Check environment variable using the authoritative env var resolution
// (handles multi-var lookups like ANTHROPIC_OAUTH_TOKEN || ANTHROPIC_API_KEY,
// COPILOT_GITHUB_TOKEN || GH_TOKEN || GITHUB_TOKEN, Vertex ADC, Bedrock, etc.)
if (getEnvApiKey(providerId)) {
if (getConfiguredEnvApiKey(providerId)) {
return { found: true, source: "env", backedOff: false };
}
// Check for vault:// URIs in env vars (late-binding resolution)
@ -278,6 +278,7 @@ function checkLlmProviders() {
label,
category: "llm",
status: "ok",
source: lookup.source,
message: `${label} — key present (${lookup.source})`,
required: true,
});

View file

@ -31,6 +31,7 @@
"sf_replan_slice",
"sf_requirement_save",
"sf_requirement_update",
"sf_retrieval_evidence",
"sf_resume",
"sf_save_gate_result",
"sf_self_feedback_resolve",

View file

@ -8,11 +8,7 @@
import { existsSync, readFileSync, writeFileSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import {
getEnvApiKey,
getModels,
getProviders,
} from "@singularity-forge/pi-ai";
import { getModels, getProviders } from "@singularity-forge/pi-ai";
import {
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
@ -26,6 +22,7 @@ import {
getGlobalSFPreferencesPath,
loadEffectiveSFPreferences,
} from "./preferences.js";
import { getConfiguredEnvApiKey } from "./provider-env-auth.js";
const OPENCODE_FREE_MODEL_IDS = new Set([
"big-pickle",
@ -35,7 +32,6 @@ const OPENCODE_FREE_MODEL_IDS = new Set([
]);
const HIDDEN_MODEL_PROVIDERS = new Set([
"claude-code",
"google",
"google-vertex",
"groq",
"github-copilot",
@ -266,7 +262,7 @@ function resolveAutoBenchmarkPickForUnit(unitType, prefs) {
const candidates = [];
for (const provider of getProviders()) {
if (!isProviderAllowedByLists(provider, allowed, blocked)) continue;
if (!getEnvApiKey(provider)) continue;
if (!getConfiguredEnvApiKey(provider)) continue;
for (const model of getModels(provider)) {
if (
!isProviderModelAllowed(
@ -296,7 +292,12 @@ function resolveAutoBenchmarkPickForUnit(unitType, prefs) {
});
if (!picked) return undefined;
return { primary: picked.primary, fallbacks: picked.fallbacks };
} catch {
} catch (err) {
if (process.env.SF_DEBUG_PREFERENCES_MODELS === "1") {
console.warn(
`preferences-models auto benchmark failed: ${err instanceof Error ? err.stack || err.message : String(err)}`,
);
}
return undefined;
}
}

View file

@ -0,0 +1,73 @@
import { existsSync, readFileSync } from "node:fs";
import { join } from "node:path";
import { getEnvApiKey } from "@singularity-forge/pi-ai";
import {
getAgentDir,
SettingsManager,
} from "@singularity-forge/pi-coding-agent";
const GOOGLE_ENV_AUTH_DEFAULT_OFF_PROVIDERS = new Set([
"google",
"google-gemini-cli",
]);
function readJson(path) {
try {
if (!existsSync(path)) return {};
return JSON.parse(readFileSync(path, "utf-8"));
} catch {
return {};
}
}
function readProviderEnvAuthSettings(cwd, agentDir) {
const globalSettings = readJson(join(agentDir, "settings.json"));
const projectSettings = readJson(join(cwd, ".sf", "settings.json"));
return {
...(globalSettings.providerEnvAuth ?? {}),
...(projectSettings.providerEnvAuth ?? {}),
providers: {
...(globalSettings.providerEnvAuth?.providers ?? {}),
...(projectSettings.providerEnvAuth?.providers ?? {}),
},
};
}
function getProviderEnvAuthMode(providerId, cwd) {
const agentDir = getAgentDir();
const settingsManager = SettingsManager.create(cwd, agentDir);
if (typeof settingsManager.getProviderEnvAuthMode === "function") {
return settingsManager.getProviderEnvAuthMode(providerId);
}
const settings = readProviderEnvAuthSettings(cwd, agentDir);
return (
settings.providers?.[providerId] ??
settings.default ??
(GOOGLE_ENV_AUTH_DEFAULT_OFF_PROVIDERS.has(providerId) ? "off" : "auto")
);
}
function getProviderEnvKey(providerId) {
const apiKey = getEnvApiKey(providerId);
if (apiKey) return apiKey;
if (providerId === "google") {
return (
process.env.GEMINI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY
);
}
return undefined;
}
/**
* Return the provider env API key only when Forge settings allow env auth.
*
* Purpose: keep SF extension-side provider heuristics aligned with the core
* providerEnvAuth policy so ambient env keys do not bypass settings.json.
*
* Consumer: doctor-providers.js and preferences-models.js when checking whether
* a provider is available from environment credentials.
*/
export function getConfiguredEnvApiKey(providerId, cwd = process.cwd()) {
if (getProviderEnvAuthMode(providerId, cwd) === "off") return undefined;
return getProviderEnvKey(providerId);
}

View file

@ -0,0 +1,66 @@
/**
* retrieval-evidence.js DB-backed retrieval provenance helpers.
*
* Purpose: give local code, docs, and web retrieval tools one audit contract so
* agents can distinguish live source evidence from stale or external context.
*
* Consumer: `sift_search`, `codebase_search`, and future Context7/web bridges.
*/
import { execFileSync } from "node:child_process";
function readGitValue(projectRoot, args) {
try {
return execFileSync("git", args, {
cwd: projectRoot,
encoding: "utf-8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 2_000,
}).trim();
} catch {
return "";
}
}
/**
* Build best-effort git provenance for a retrieval event.
* Purpose: label whether a context hit came from clean HEAD, a dirty worktree,
* or an unknown non-git directory before it is trusted for planning.
* Consumer: retrieval evidence writers.
*/
export function buildRetrievalProvenance(projectRoot) {
const gitHead = readGitValue(projectRoot, ["rev-parse", "HEAD"]) || null;
const gitBranch =
readGitValue(projectRoot, ["branch", "--show-current"]) ||
readGitValue(projectRoot, ["rev-parse", "--abbrev-ref", "HEAD"]) ||
null;
const status = readGitValue(projectRoot, ["status", "--porcelain"]);
return {
gitHead,
gitBranch,
worktreeDirty: status.length > 0,
freshness: gitHead
? status.length > 0
? "working-tree"
: "git-head"
: "unknown",
};
}
/**
* Record retrieval evidence without making retrieval depend on DB availability.
* Purpose: preserve provenance when the SF DB is open while letting search tools
* degrade normally in standalone or early-startup contexts.
* Consumer: local retrieval tool implementations.
*/
export async function recordRetrievalEvidence(projectRoot, entry) {
try {
const { insertRetrievalEvidence } = await import("./sf-db.js");
insertRetrievalEvidence({
projectRoot,
...buildRetrievalProvenance(projectRoot),
...entry,
});
} catch {
// Retrieval evidence is best-effort; search results must remain usable.
}
}

View file

@ -78,7 +78,7 @@ function openRawDb(path) {
loadProvider();
return new DatabaseSync(path);
}
const SCHEMA_VERSION = 40;
const SCHEMA_VERSION = 41;
function indexExists(db, name) {
return !!db
.prepare(
@ -319,6 +319,39 @@ function ensureSelfFeedbackTables(db) {
"CREATE INDEX IF NOT EXISTS idx_self_feedback_kind ON self_feedback(kind, ts)",
);
}
function ensureRetrievalEvidenceTables(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS retrieval_evidence (
id INTEGER PRIMARY KEY AUTOINCREMENT,
backend TEXT NOT NULL,
source_kind TEXT NOT NULL DEFAULT 'code',
query TEXT NOT NULL DEFAULT '',
strategy TEXT NOT NULL DEFAULT '',
scope TEXT NOT NULL DEFAULT '',
project_root TEXT NOT NULL DEFAULT '',
git_head TEXT DEFAULT NULL,
git_branch TEXT DEFAULT NULL,
worktree_dirty INTEGER NOT NULL DEFAULT 0,
freshness TEXT NOT NULL DEFAULT 'unknown',
status TEXT NOT NULL DEFAULT 'ok',
hit_count INTEGER NOT NULL DEFAULT 0,
elapsed_ms INTEGER NOT NULL DEFAULT 0,
cache_path TEXT DEFAULT NULL,
error TEXT DEFAULT NULL,
result_json TEXT NOT NULL DEFAULT '{}',
recorded_at TEXT NOT NULL
)
`);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_backend_recorded ON retrieval_evidence(backend, recorded_at DESC)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_scope_recorded ON retrieval_evidence(scope, recorded_at DESC)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_status_recorded ON retrieval_evidence(status, recorded_at DESC)",
);
}
function ensureSpecSchemaTables(db) {
// Tier 1.3: Spec/Runtime/Evidence schema separation
// Creates 9 normalized tables for milestone, slice, task entities
@ -867,6 +900,7 @@ function initSchema(db, fileBacked) {
`);
ensureSelfFeedbackTables(db);
ensureSolverEvalTables(db);
ensureRetrievalEvidenceTables(db);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_memories_active ON memories(superseded_by)",
);
@ -940,6 +974,7 @@ function initSchema(db, fileBacked) {
ensureHeadlessRunTables(db);
ensureUokMessageTables(db);
ensureSpecSchemaTables(db);
ensureRetrievalEvidenceTables(db);
db.exec(
`CREATE VIEW IF NOT EXISTS active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL`,
);
@ -2106,6 +2141,15 @@ function migrateSchema(db) {
":applied_at": new Date().toISOString(),
});
}
if (currentVersion < 41) {
ensureRetrievalEvidenceTables(db);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 41,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
@ -6208,6 +6252,73 @@ export function getJudgmentsForUnit(unitIdPrefix, limit = 1000) {
return [];
}
}
// ─── Retrieval Evidence ─────────────────────────────────────────────────────
/**
* Record a retrieval lookup with source provenance.
* Purpose: let SF compare live code, semantic, docs, and web context by the same
* freshness and scope contract before planning or implementation trusts it.
* Consumer: Sift/codebase search tools and future Context7/web retrieval bridges.
*/
export function insertRetrievalEvidence(args) {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
const now = args.recordedAt ?? new Date().toISOString();
currentDb
.prepare(`INSERT INTO retrieval_evidence (
backend, source_kind, query, strategy, scope, project_root,
git_head, git_branch, worktree_dirty, freshness, status,
hit_count, elapsed_ms, cache_path, error, result_json, recorded_at
) VALUES (
:backend, :source_kind, :query, :strategy, :scope, :project_root,
:git_head, :git_branch, :worktree_dirty, :freshness, :status,
:hit_count, :elapsed_ms, :cache_path, :error, :result_json, :recorded_at
)`)
.run({
":backend": args.backend,
":source_kind": args.sourceKind ?? "code",
":query": args.query ?? "",
":strategy": args.strategy ?? "",
":scope": args.scope ?? "",
":project_root": args.projectRoot ?? "",
":git_head": args.gitHead ?? null,
":git_branch": args.gitBranch ?? null,
":worktree_dirty": intBool(args.worktreeDirty),
":freshness": args.freshness ?? "unknown",
":status": args.status ?? "ok",
":hit_count": args.hitCount ?? 0,
":elapsed_ms": args.elapsedMs ?? 0,
":cache_path": args.cachePath ?? null,
":error": args.error ?? null,
":result_json": JSON.stringify(args.result ?? {}),
":recorded_at": now,
});
}
/**
* Return recent retrieval evidence rows.
* Purpose: support audits that need to distinguish live source evidence from
* stale indexed or prose-only context.
* Consumer: inspect/doctor tooling and tests for retrieval provenance.
*/
export function getRetrievalEvidence(limit = 100) {
if (!currentDb) return [];
const rows = currentDb
.prepare(`SELECT
id, backend, source_kind AS sourceKind, query, strategy, scope,
project_root AS projectRoot, git_head AS gitHead,
git_branch AS gitBranch, worktree_dirty AS worktreeDirty,
freshness, status, hit_count AS hitCount, elapsed_ms AS elapsedMs,
cache_path AS cachePath, error, result_json AS resultJson, recorded_at AS recordedAt
FROM retrieval_evidence
ORDER BY recorded_at DESC, id DESC
LIMIT :limit`)
.all({ ":limit": limit });
return rows.map((row) => ({
...row,
worktreeDirty: row.worktreeDirty === 1,
result: parseJsonObject(row.resultJson, {}),
}));
}
// ─── Memory Embeddings ───────────────────────────────────────────────────────
export function upsertMemoryEmbedding(args) {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");

View file

@ -0,0 +1,124 @@
/**
* context7-retrieval-evidence.test.mjs Context7 provenance coverage.
*
* Purpose: prove external documentation lookups write DB evidence with backend,
* scope, freshness, and result metadata before agents trust indexed docs.
*/
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import registerContext7Extension from "../../context7/index.js";
import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js";
const tmpRoots = [];
const originalCwd = process.cwd();
const originalFetch = globalThis.fetch;
afterEach(() => {
process.chdir(originalCwd);
closeDatabase();
globalThis.fetch = originalFetch;
for (const root of tmpRoots.splice(0)) {
rmSync(root, { recursive: true, force: true });
}
});
function makeProject() {
const root = mkdtempSync(join(tmpdir(), "sf-context7-evidence-"));
tmpRoots.push(root);
mkdirSync(join(root, ".sf"), { recursive: true });
return root;
}
function captureContext7Tools() {
const tools = new Map();
registerContext7Extension({
on() {},
registerTool(definition) {
tools.set(definition.name, definition);
},
});
return tools;
}
test("resolve_library_when_successful_records_retrieval_evidence", async () => {
const project = makeProject();
process.chdir(project);
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
globalThis.fetch = async (url) => {
assert.match(String(url), /\/libs\/search/);
return {
ok: true,
json: async () => ({
results: [
{
id: "/websites/example_docs",
title: "Example Docs",
trustScore: 9,
benchmarkScore: 87.5,
},
],
}),
};
};
const tools = captureContext7Tools();
const result = await tools.get("resolve_library").execute("call-1", {
libraryName: "example-docs-evidence",
query: "routing",
});
assert.equal(result.details.resultCount, 1);
const rows = getRetrievalEvidence(5);
assert.equal(rows.length, 1);
assert.equal(rows[0].backend, "context7");
assert.equal(rows[0].sourceKind, "docs");
assert.equal(rows[0].strategy, "library-search");
assert.equal(rows[0].scope, "example-docs-evidence");
assert.equal(rows[0].freshness, "external-index");
assert.equal(rows[0].status, "ok");
assert.equal(rows[0].hitCount, 1);
assert.deepEqual(rows[0].result.libraries, [
{
id: "/websites/example_docs",
title: "Example Docs",
trustScore: 9,
benchmarkScore: 87.5,
},
]);
});
test("get_library_docs_when_successful_records_retrieval_evidence", async () => {
const project = makeProject();
process.chdir(project);
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
globalThis.fetch = async (url) => {
assert.match(String(url), /\/context/);
return {
ok: true,
text: async () => "Example docs about routing.",
};
};
const tools = captureContext7Tools();
const result = await tools.get("get_library_docs").execute("call-1", {
libraryId: "/websites/example_docs",
query: "routing",
tokens: 500,
});
assert.equal(result.details.charCount, "Example docs about routing.".length);
const rows = getRetrievalEvidence(5);
assert.equal(rows.length, 1);
assert.equal(rows[0].backend, "context7");
assert.equal(rows[0].sourceKind, "docs");
assert.equal(rows[0].strategy, "docs-fetch");
assert.equal(rows[0].scope, "/websites/example_docs");
assert.equal(rows[0].freshness, "external-index");
assert.equal(rows[0].status, "ok");
assert.equal(rows[0].hitCount, 1);
assert.equal(rows[0].result.tokens, 500);
assert.equal(rows[0].result.charCount, "Example docs about routing.".length);
});

View file

@ -24,6 +24,7 @@ function makePreferencesProject(globalPreferences) {
const home = join(root, "home");
const project = join(root, "project");
mkdirSync(home, { recursive: true });
mkdirSync(join(home, ".sf", "agent"), { recursive: true });
mkdirSync(join(project, ".sf"), { recursive: true });
writeFileSync(join(home, "preferences.md"), globalPreferences, "utf-8");
writeFileSync(
@ -32,6 +33,7 @@ function makePreferencesProject(globalPreferences) {
"utf-8",
);
process.env.SF_HOME = home;
process.env.HOME = home;
process.chdir(project);
return project;
}
@ -91,4 +93,59 @@ describe("doctor provider checks", () => {
assert.equal(telegram?.status, "unconfigured");
assert.equal(telegram?.required, false);
});
test("runProviderChecks_when_google_env_auth_is_default_off_treats_google_as_missing_required_route", () => {
makePreferencesProject(
[
"---",
"version: 1",
"models:",
" planning: google/gemini-2.5-pro",
"---",
"",
].join("\n"),
);
process.env.GEMINI_API_KEY = "test-google-key";
const results = runProviderChecks();
const google = results.find((result) => result.name === "google");
assert.equal(google?.status, "error");
});
test("runProviderChecks_when_google_env_auth_is_enabled_accepts_google_env_key", () => {
const project = makePreferencesProject(
[
"---",
"version: 1",
"models:",
" planning: google/gemini-2.5-pro",
"---",
"",
].join("\n"),
);
mkdirSync(join(project, ".sf"), { recursive: true });
writeFileSync(
join(project, ".sf", "settings.json"),
JSON.stringify(
{
providerEnvAuth: {
providers: {
google: "on",
},
},
},
null,
2,
),
"utf-8",
);
process.env.GEMINI_API_KEY = "test-google-key";
const results = runProviderChecks();
const google = results.find((result) => result.name === "google");
assert.equal(google?.status, "ok");
assert.equal(google?.source, "env");
});
});

View file

@ -0,0 +1,93 @@
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, test } from "vitest";
import { resolveModelWithFallbacksForUnit } from "../preferences-models.js";
import { getConfiguredEnvApiKey } from "../provider-env-auth.js";
const originalCwd = process.cwd();
const originalEnv = { ...process.env };
const tmpDirs = [];
afterEach(() => {
process.chdir(originalCwd);
process.env = { ...originalEnv };
while (tmpDirs.length > 0) {
rmSync(tmpDirs.pop(), { recursive: true, force: true });
}
});
function makePreferencesProject(projectPreferences, projectSettings) {
const root = mkdtempSync(join(tmpdir(), "sf-preferences-models-"));
tmpDirs.push(root);
const home = join(root, "home");
const project = join(root, "project");
mkdirSync(join(home, ".sf", "agent"), { recursive: true });
mkdirSync(join(project, ".sf"), { recursive: true });
writeFileSync(
join(project, ".sf", "PREFERENCES.md"),
projectPreferences,
"utf-8",
);
if (projectSettings) {
writeFileSync(
join(project, ".sf", "settings.json"),
JSON.stringify(projectSettings, null, 2),
"utf-8",
);
}
process.env.HOME = home;
process.chdir(project);
return project;
}
describe("preferences model resolution", () => {
test("resolveModelWithFallbacksForUnit_when_google_env_auth_is_default_off_skips_google_auto_benchmark_candidates", () => {
makePreferencesProject(
[
"---",
"version: 1",
"allowed_providers:",
" - google",
"models: {}",
"---",
"",
].join("\n"),
);
process.env.GEMINI_API_KEY = "test-google-key";
const result = resolveModelWithFallbacksForUnit("plan-milestone");
assert.equal(result, undefined);
});
test("resolveModelWithFallbacksForUnit_when_google_env_auth_is_enabled_uses_google_auto_benchmark_candidates", () => {
makePreferencesProject(
[
"---",
"version: 1",
"allowed_providers:",
" - google",
"models: {}",
"---",
"",
].join("\n"),
{
providerEnvAuth: {
providers: {
google: "on",
},
},
},
);
process.env.GEMINI_API_KEY = "test-google-key";
assert.equal(getConfiguredEnvApiKey("google"), "test-google-key");
const result = resolveModelWithFallbacksForUnit("plan-milestone");
assert.ok(result);
assert.equal(typeof result.primary, "string");
assert.ok(result.primary.length > 0);
});
});

View file

@ -14,9 +14,11 @@ import {
closeDatabase,
getDatabase,
getJudgmentsForUnit,
getRetrievalEvidence,
getScheduleEntries,
insertGateRun,
insertJudgment,
insertRetrievalEvidence,
insertScheduleEntry,
openDatabase,
} from "../sf-db.js";
@ -203,7 +205,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
assert.equal(version.version, 40);
assert.equal(version.version, 41);
const taskSpec = db
.prepare(
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",
@ -343,3 +345,37 @@ test("openDatabase_judgments_table_round_trip", () => {
assert.equal(t01.length, 1);
assert.equal(t01[0].confidence, "high");
});
test("openDatabase_retrieval_evidence_table_round_trip", () => {
assert.equal(openDatabase(":memory:"), true);
insertRetrievalEvidence({
backend: "sift",
sourceKind: "code",
query: "approval policy",
strategy: "bm25",
scope: "src",
projectRoot: "/repo",
gitHead: "abc123",
gitBranch: "main",
worktreeDirty: true,
freshness: "working-tree",
status: "ok",
hitCount: 1,
elapsedMs: 42,
cachePath: "/repo/.sf/runtime/sift/search-cache",
result: { hits: [{ path: "src/index.ts", score: 0.9 }] },
recordedAt: "2026-05-07T00:00:00.000Z",
});
const rows = getRetrievalEvidence(10);
assert.equal(rows.length, 1);
assert.equal(rows[0].backend, "sift");
assert.equal(rows[0].sourceKind, "code");
assert.equal(rows[0].query, "approval policy");
assert.equal(rows[0].scope, "src");
assert.equal(rows[0].worktreeDirty, true);
assert.equal(rows[0].freshness, "working-tree");
assert.deepEqual(rows[0].result, {
hits: [{ path: "src/index.ts", score: 0.9 }],
});
});

View file

@ -0,0 +1,168 @@
/**
* sift-retrieval-evidence.test.mjs Sift retrieval provenance coverage.
*
* Purpose: prove live code searches record DB evidence with backend, scope,
* freshness, and result metadata so context tools can be audited consistently.
*/
import assert from "node:assert/strict";
import {
chmodSync,
mkdirSync,
mkdtempSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import registerSubagentExtension from "../../subagent/index.js";
import { registerQueryTools } from "../bootstrap/query-tools.js";
import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js";
import { registerSiftSearchTool } from "../tools/sift-search-tool.js";
const tmpRoots = [];
const originalCwd = process.cwd();
const originalSiftPath = process.env.SIFT_PATH;
afterEach(() => {
process.chdir(originalCwd);
closeDatabase();
if (originalSiftPath === undefined) {
delete process.env.SIFT_PATH;
} else {
process.env.SIFT_PATH = originalSiftPath;
}
for (const root of tmpRoots.splice(0)) {
rmSync(root, { recursive: true, force: true });
}
});
function makeProject() {
const root = mkdtempSync(join(tmpdir(), "sf-sift-evidence-"));
tmpRoots.push(root);
mkdirSync(join(root, ".sf"), { recursive: true });
mkdirSync(join(root, "src"), { recursive: true });
writeFileSync(join(root, "src", "index.js"), "export const value = 1;\n");
return root;
}
function makeFakeSift() {
const dir = mkdtempSync(join(tmpdir(), "sf-sift-bin-"));
tmpRoots.push(dir);
const bin = join(dir, "sift");
writeFileSync(
bin,
`#!/bin/sh
printf '{"query":"approval policy","strategy":"bm25","hits":[{"path":"src/index.js","score":0.91,"content":"approval policy contract","line_start":1,"line_end":1}]}'`,
);
chmodSync(bin, 0o755);
return bin;
}
function captureTool() {
let tool = null;
registerSiftSearchTool({
registerTool(definition) {
tool = definition;
},
});
assert.ok(tool, "sift_search tool should register");
return tool;
}
function captureQueryTool(name) {
const tools = new Map();
registerQueryTools({
registerTool(definition) {
tools.set(definition.name, definition);
},
});
const tool = tools.get(name);
assert.ok(tool, `${name} tool should register`);
return tool;
}
function captureSubagentTool(name) {
const tools = new Map();
registerSubagentExtension({
on() {},
registerCommand() {},
registerTool(definition) {
tools.set(definition.name, definition);
},
});
const tool = tools.get(name);
assert.ok(tool, `${name} tool should register`);
return tool;
}
test("sift_search_when_successful_records_retrieval_evidence", async () => {
const project = makeProject();
process.chdir(project);
process.env.SIFT_PATH = makeFakeSift();
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
const tool = captureTool();
const result = await tool.execute("call-1", {
query: "approval policy",
path: "src",
strategy: "bm25",
limit: 5,
});
assert.equal(result.isError, undefined);
assert.equal(result.details.hitCount, 1);
const rows = getRetrievalEvidence(5);
assert.equal(rows.length, 1);
assert.equal(rows[0].backend, "sift");
assert.equal(rows[0].sourceKind, "code");
assert.equal(rows[0].query, "approval policy");
assert.equal(rows[0].strategy, "bm25");
assert.equal(rows[0].scope, "src");
assert.equal(rows[0].status, "ok");
assert.equal(rows[0].hitCount, 1);
assert.equal(rows[0].projectRoot, project);
assert.match(rows[0].cachePath, /\.sf\/runtime\/sift\/search-cache$/);
assert.deepEqual(rows[0].result.hits, [
{
path: "src/index.js",
score: 0.91,
lineStart: 1,
lineEnd: 1,
},
]);
const queryTool = captureQueryTool("sf_retrieval_evidence");
const queryResult = await queryTool.execute("call-2", { limit: 1 });
assert.match(queryResult.content[0].text, /Retrieval evidence: 1 row/);
assert.equal(queryResult.details.rows[0].backend, "sift");
assert.equal(queryResult.details.rows[0].query, "approval policy");
});
test("codebase_search_when_successful_records_retrieval_evidence", async () => {
const project = makeProject();
process.chdir(project);
process.env.SIFT_PATH = makeFakeSift();
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
const tool = captureSubagentTool("codebase_search");
const result = await tool.execute("call-1", {
query: "approval policy",
scope: "src",
strategy: "path-hybrid",
timeoutMs: 10_000,
});
assert.equal(result.details.operation, "codebase_search");
assert.equal(result.details.exitCode, 0);
const rows = getRetrievalEvidence(5);
assert.equal(rows.length, 1);
assert.equal(rows[0].backend, "codebase_search");
assert.equal(rows[0].sourceKind, "code");
assert.equal(rows[0].query, "approval policy");
assert.equal(rows[0].strategy, "path-hybrid");
assert.equal(rows[0].scope, "src");
assert.equal(rows[0].status, "ok");
assert.equal(rows[0].hitCount, 1);
assert.match(rows[0].result.outputPreview, /approval policy contract/);
});

View file

@ -0,0 +1,136 @@
/**
* web-search-retrieval-evidence.test.mjs web search provenance coverage.
*
* Purpose: prove current/external fact lookups write DB evidence before agents
* use web results as planning input.
*/
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import {
registerSearchTool,
resetSearchLoopGuardState,
} from "../../search-the-web/tool-search.js";
import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js";
const tmpRoots = [];
const originalCwd = process.cwd();
const originalFetch = globalThis.fetch;
const originalBraveKey = process.env.BRAVE_API_KEY;
const originalTavilyKey = process.env.TAVILY_API_KEY;
const originalMiniMaxCodePlanKey = process.env.MINIMAX_CODE_PLAN_KEY;
const originalMiniMaxCodingKey = process.env.MINIMAX_CODING_API_KEY;
const originalMiniMaxKey = process.env.MINIMAX_API_KEY;
const originalSerperKey = process.env.SERPER_API_KEY;
const originalExaKey = process.env.EXA_API_KEY;
const originalOllamaKey = process.env.OLLAMA_API_KEY;
afterEach(() => {
process.chdir(originalCwd);
closeDatabase();
globalThis.fetch = originalFetch;
if (originalBraveKey === undefined) {
delete process.env.BRAVE_API_KEY;
} else {
process.env.BRAVE_API_KEY = originalBraveKey;
}
restoreEnv("TAVILY_API_KEY", originalTavilyKey);
restoreEnv("MINIMAX_CODE_PLAN_KEY", originalMiniMaxCodePlanKey);
restoreEnv("MINIMAX_CODING_API_KEY", originalMiniMaxCodingKey);
restoreEnv("MINIMAX_API_KEY", originalMiniMaxKey);
restoreEnv("SERPER_API_KEY", originalSerperKey);
restoreEnv("EXA_API_KEY", originalExaKey);
restoreEnv("OLLAMA_API_KEY", originalOllamaKey);
resetSearchLoopGuardState();
for (const root of tmpRoots.splice(0)) {
rmSync(root, { recursive: true, force: true });
}
});
function restoreEnv(key, value) {
if (value === undefined) {
delete process.env[key];
} else {
process.env[key] = value;
}
}
function makeProject() {
const root = mkdtempSync(join(tmpdir(), "sf-web-search-evidence-"));
tmpRoots.push(root);
mkdirSync(join(root, ".sf"), { recursive: true });
return root;
}
function captureSearchTool() {
let tool = null;
registerSearchTool({
registerTool(definition) {
tool = definition;
},
writeTempFile: async () => "/tmp/not-used",
});
assert.ok(tool, "search-the-web tool should register");
return tool;
}
test("search_the_web_when_successful_records_retrieval_evidence", async () => {
const project = makeProject();
process.chdir(project);
process.env.BRAVE_API_KEY = "test-brave-key";
process.env.TAVILY_API_KEY = "";
process.env.MINIMAX_CODE_PLAN_KEY = "";
process.env.MINIMAX_CODING_API_KEY = "";
process.env.MINIMAX_API_KEY = "";
process.env.SERPER_API_KEY = "";
process.env.EXA_API_KEY = "";
process.env.OLLAMA_API_KEY = "";
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
globalThis.fetch = async (url) => {
assert.match(String(url), /api\.search\.brave\.com/);
return {
ok: true,
headers: { get: () => null },
json: async () => ({
web: {
results: [
{
title: "Example Result",
url: "https://example.com/result",
description: "A result for testing.",
},
],
},
query: {
original: "example query",
altered: "example query",
},
}),
};
};
const tool = captureSearchTool();
const result = await tool.execute("call-1", {
query: "example query",
count: 1,
});
assert.equal(result.details.provider, "brave");
assert.equal(result.details.count, 1);
const rows = getRetrievalEvidence(5);
assert.equal(rows.length, 1);
assert.equal(rows[0].backend, "search-the-web");
assert.equal(rows[0].sourceKind, "web");
assert.equal(rows[0].strategy, "brave");
assert.equal(rows[0].freshness, "external-live");
assert.equal(rows[0].status, "ok");
assert.equal(rows[0].hitCount, 1);
assert.deepEqual(rows[0].result.results, [
{
title: "Example Result",
url: "https://example.com/result",
},
]);
});

View file

@ -17,6 +17,7 @@ import {
resolveSiftBinary,
resolveSiftSearchScope,
} from "../code-intelligence.js";
import { recordRetrievalEvidence } from "../retrieval-evidence.js";
const _KNOWN_STRATEGIES = [
"hybrid",
@ -263,20 +264,45 @@ export function registerSiftSearchTool(pi) {
const elapsedMs = Date.now() - startedAt;
const result = parseSiftOutput(stdout, stderr);
const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
// Telemetry: log query outcomes for tuning
const { logInfo } = await import("../workflow-logger.js");
logInfo("sift_search", {
await recordRetrievalEvidence(projectRoot, {
backend: "sift",
sourceKind: "code",
query: params.query,
strategy: params.strategy ?? DEFAULT_STRATEGY,
agent: params.agent ?? false,
path: scope,
scope,
status: "ok",
hitCount: result.hits.length,
elapsedMs,
binary: binaryPath,
searchCache: runtimeDirs.searchCache,
cachePath: runtimeDirs.searchCache,
result: {
hits: result.hits.map((hit) => ({
path: hit.path,
score: hit.score,
lineStart: hit.lineStart,
lineEnd: hit.lineEnd,
})),
agent: params.agent ?? false,
agentMode: params.agentMode ?? null,
plannerStrategy: params.plannerStrategy ?? null,
},
});
try {
const { debugLog } = await import("../debug-logger.js");
debugLog("sift_search", {
query: params.query,
strategy: params.strategy ?? DEFAULT_STRATEGY,
agent: params.agent ?? false,
path: scope,
hitCount: result.hits.length,
elapsedMs,
binary: binaryPath,
searchCache: runtimeDirs.searchCache,
});
} catch {
// Telemetry must not change search semantics.
}
const lines = [
`Sift search: "${params.query}"`,
`Strategy: ${params.strategy ?? DEFAULT_STRATEGY}${params.agent ? ` | agent: ${params.agentMode ?? "linear"} | planner: ${params.plannerStrategy ?? "heuristic"}` : ""}`,
@ -319,6 +345,17 @@ export function registerSiftSearchTool(pi) {
} catch (err) {
const elapsedMs = Date.now() - startedAt;
const message = err instanceof Error ? err.message : String(err);
await recordRetrievalEvidence(projectRoot, {
backend: "sift",
sourceKind: "code",
query: params.query,
strategy: params.strategy ?? DEFAULT_STRATEGY,
scope,
status: "error",
hitCount: 0,
elapsedMs,
error: message,
});
return {
content: [
{

View file

@ -29,6 +29,7 @@ import {
resolveSiftSearchScope,
} from "../sf/code-intelligence.js";
import { loadEffectiveSFPreferences } from "../sf/preferences.js";
import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js";
import { formatTokenCount } from "../shared/mod.js";
import { getCurrentPhase } from "../shared/sf-phase-state.js";
import { discoverAgents } from "./agents.js";
@ -2281,6 +2282,7 @@ export default function (pi) {
const scope = resolveSiftSearchScope(projectRoot, params.scope);
const strategy = params.strategy ?? "page-index-hybrid";
const query = params.query;
const startedAt = Date.now();
const timeoutMs =
typeof params.timeoutMs === "number" &&
Number.isFinite(params.timeoutMs)
@ -2288,6 +2290,17 @@ export default function (pi) {
: CODEBASE_SEARCH_TIMEOUT_MS;
const siftBin = resolveSiftBinary();
if (!siftBin) {
await recordRetrievalEvidence(projectRoot, {
backend: "codebase_search",
sourceKind: "code",
query,
strategy,
scope,
status: "error",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
error: "sift binary not found",
});
return {
content: [
{
@ -2367,6 +2380,22 @@ export default function (pi) {
const text = timedOut
? `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.`
: "Code search aborted.";
await recordRetrievalEvidence(projectRoot, {
backend: "codebase_search",
sourceKind: "code",
query,
strategy,
scope,
status: timedOut ? "timeout" : "aborted",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
cachePath: runtimeDirs.searchCache,
error: text,
result: {
siftBin,
timeoutMs,
},
});
return {
content: [
{
@ -2396,6 +2425,23 @@ export default function (pi) {
: err
? `\n\nsift stderr: ${err.slice(0, 500)}`
: "";
await recordRetrievalEvidence(projectRoot, {
backend: "codebase_search",
sourceKind: "code",
query,
strategy,
scope,
status: "error",
hitCount: 0,
elapsedMs: Date.now() - startedAt,
cachePath: runtimeDirs.searchCache,
error: err || `exit ${exitCode}`,
result: {
siftBin,
exitCode,
timeoutMs,
},
});
return {
content: [
{
@ -2415,6 +2461,24 @@ export default function (pi) {
},
};
}
await recordRetrievalEvidence(projectRoot, {
backend: "codebase_search",
sourceKind: "code",
query,
strategy,
scope,
status: exitCode === 0 ? "ok" : "partial",
hitCount: out.trim() ? 1 : 0,
elapsedMs: Date.now() - startedAt,
cachePath: runtimeDirs.searchCache,
error: err || null,
result: {
siftBin,
exitCode,
timeoutMs,
outputPreview: out.slice(0, 2_000),
},
});
return {
content: [
{