feat: record retrieval evidence across context tools
This commit is contained in:
parent
05f185256c
commit
b0fce94f9e
33 changed files with 1661 additions and 224 deletions
|
|
@ -1,14 +1,16 @@
|
|||
# CLI Agent Code Survey — 2026-05-07
|
||||
# SF + ACE Full-Stack Reference Survey — 2026-05-07
|
||||
|
||||
This record compares the local coding-agent checkouts under `/home/mhugo/code/`
|
||||
against Forge. It is planning evidence, not an instruction to copy another
|
||||
product's architecture.
|
||||
This record compares local coding-agent, orchestration, retrieval, model, and
|
||||
platform-engineering references under `/home/mhugo/code/` plus selected indexed
|
||||
public references against the intended SF+ACE full-stack flow. It is planning
|
||||
evidence, not an instruction to copy another product's architecture.
|
||||
|
||||
## Product Boundary
|
||||
|
||||
Forge remains the product, and UOK remains the internal execution kernel.
|
||||
External CLIs are reference implementations used to sharpen Forge, not
|
||||
destination architectures.
|
||||
Forge remains the local product/runtime surface, ACE remains the higher-level
|
||||
workflow/control-plane layer, and UOK remains the internal execution safety
|
||||
kernel. External systems are reference implementations used to sharpen the
|
||||
unified SF+ACE flow, not destination architectures.
|
||||
|
||||
Hard boundary: Forge must stay an MCP client only. Do not add, restore, or plan
|
||||
an SF MCP server. External control belongs in daemon, RPC, and headless
|
||||
|
|
@ -38,10 +40,50 @@ Additional coder references:
|
|||
- `open-codex`
|
||||
- `letta-code`
|
||||
- `neovate-code`
|
||||
- `symphony`
|
||||
- `singularity/machine` (`codemachine`)
|
||||
|
||||
Indexed-only references to include in future passes:
|
||||
|
||||
- `kimi-cli` / Kimi Code
|
||||
- Spec Kit
|
||||
- upstream CodeMachine CLI (`moazbuilds/CodeMachine-CLI`)
|
||||
|
||||
The local `claude-code` checkout is a leaked-source/sourcemap research mirror,
|
||||
not a clean upstream dependency. Treat it as ergonomics evidence only.
|
||||
|
||||
## SF + ACE Full-Stack Reference Map
|
||||
|
||||
The long-term target is a unified SF+ACE autonomous software flow, not a
|
||||
collection of unrelated coding assistants. Compare each repo at the layer where
|
||||
it is strongest.
|
||||
|
||||
| Repo / Tool | Full-Stack Layer | Pattern To Study | Evidence Mode | Safe `sift` Scope |
|
||||
|---|---|---|---|---|
|
||||
| `singularity-forge` | Local product/runtime | UOK, DB-first state, CLI/TUI/headless, extension tools, MCP-client-only guardrails | local source + `sift` | `docs/`, `src/resources/extensions/sf/`, `packages/*/src/`, tests |
|
||||
| `ace-coder` | Workflow/control plane | HTDAG/YAML workflow DAGs, reviewers, quality gates, deployment governance, multi-repo memory | local source + `sift` only | `AGENTS.md`, `CLAUDE.md`, `docs/`, `.agents/skills/`, `python/ai_dev/` first-party modules |
|
||||
| `symphony` | Work orchestration | Linear polling, isolated per-issue workspaces, `WORKFLOW.md`, Codex app-server, retries, PR review/landing | local source + Context7 `/openai/symphony` | `README.md`, `SPEC.md`, `elixir/WORKFLOW.md`, `elixir/AGENTS.md`, `.codex/skills/` |
|
||||
| `codemachine` | Multi-agent workflow engine | Engine matrix, SmartRouter, spec-to-code workflow templates, feature flags, tool health | local fork/source + web upstream | `README.md`, `docs/architecture/`, `templates/workflows/`, `prompts/agents/`, `prompts/moderator/` |
|
||||
| Amplication | Platform/golden paths | Live templates, service catalog, plugin codegen, generated service lifecycle, compliance/drift | web/GitHub; clone before local planning | `docs/`, `packages/*/src/`, plugin/codegen packages if cloned |
|
||||
| Spec Kit | Spec-driven artifacts | Constitution, scenarios, FR/SC IDs, spec -> plan -> tasks -> analyze -> implement | Context7 `/github/spec-kit` | templates/docs/spec workflows if cloned |
|
||||
| `plandex` | Large-task implementation | Cumulative diff sandbox, plan versioning, context loading, apply/debug loop | local source + Context7 | `README.md`, `app/cli/lib/`, `app/server/db/`, first-party docs |
|
||||
| `aider` | Edit loop/context map | Repo-map ranking, edit formats, lint/test repair, benchmark metadata | local source + Context7 | `aider/`, `benchmark/`, `tests/`, docs; avoid generated website data unless needed |
|
||||
| `Agentless` | Bug repair/evals | Localization -> repair -> patch validation, reproduction tests, reranking | local source | `agentless/fl/`, `agentless/repair/`, `agentless/test/`, benchmark docs |
|
||||
| SWE-agent/OpenHands | Bug repair/runtime research | issue-to-patch loops, sandbox/runtime harnesses, SWE-bench evaluation | Context7/web or local clone if added | source/docs/evals only when cloned |
|
||||
| `codex` | Execution substrate | Sandbox profiles, approval policy, app-server protocol, typed events, AGENTS scope | local source + Context7 `/openai/codex` | `docs/`, `codex-rs/protocol/src/`, `codex-rs/exec/src/`, `codex-rs/linux-sandbox/`; avoid `vendor/` |
|
||||
| `claude-code` | UX reference | Permissions, commands, plugins, MCP client UX, subagent UX | local source only; leaked mirror caveat | `src/commands/`, `src/services/mcp/`, `src/tools/`, `src/components/` |
|
||||
| `qwen-code` | Terminal workflow | trusted folders, subagent fork design, terminal-capture tests, provider config | local source + Context7 | `docs/`, `packages/*/src/`, `integration-tests/terminal-capture/` |
|
||||
| Kimi Code | Model-specific coding agent | long-context coding, Kimi CLI/IDE flow, model-plan comparison | Context7 `/moonshotai/kimi-cli` | docs/source if cloned |
|
||||
| CodeGeeX2 | Model capability | multilingual code model, HumanEval-X/DS1000, local deployment/quantization | web/GitHub | benchmark/evaluation/docs if cloned |
|
||||
| `gemini-cli` | Provider CLI/testing | release channels, generated schemas/docs, eval promotion, perf/memory tests | local source + Context7 if needed | `docs/`, `evals/`, `perf-tests/`, `memory-tests/`, `packages/*/src/` |
|
||||
| `opencode` | Mode/schema boundary | plan/build modes, client/server, project-local commands/tools, canonical schema | local source + Context7 | `README.md`, `.opencode/`, `specs/`, `packages/opencode/specs/`, `packages/opencode/src/` |
|
||||
| `crush` | Local runtime/TUI | SQLite/sqlc, hooks, permissions, LSP, MCP client status, Bubble Tea UI | local source | `internal/db/`, `internal/hooks/`, `internal/permission/`, `internal/agent/tools/`, `internal/ui/` |
|
||||
| `goose` | Desktop/CLI/API agent | diagnostics, API embedding, provider/extension breadth, MCP client lifecycle | local source | `crates/`, `documentation/`, `ui/desktop/`; do not copy server posture |
|
||||
| `letta-code` | Long-lived memory | persistent agent memory, approval recovery, skills, channel/remote UX | local source | `src/agent/`, `src/permissions/`, `src/cli/`, `src/tests/` |
|
||||
| `OpenAgents` | Full-stack multi-agent platform | backend/frontend/agent split, one-agent-one-folder, plugin/data/web agents, adapters | web/GitHub; clone before local planning | `backend/`, `frontend/`, `real_agents/` if cloned |
|
||||
| Claude Context / Context+ | Code context retrieval | vector-backed semantic code search, MCP-client integration, context cost reduction | Context7/web | code search/indexing packages if cloned |
|
||||
| `amazon-q-developer-cli` | Rust auth/security | auth, security, workspace patterns, Rust CLI lessons | local source; lower priority | `crates/chat-cli/`, `crates/agent/`, docs |
|
||||
|
||||
## Comparison Matrix
|
||||
|
||||
| Reference | Strongest Fit For Forge | Borrow | Avoid |
|
||||
|
|
@ -62,6 +104,10 @@ not a clean upstream dependency. Treat it as ergonomics evidence only.
|
|||
| `neovate-code` | Design-doc and terminal UX iteration | Small design records, queued-message designs, subagent design notes, command/terminal UX records | Pulling in provider-specific branding or immature UX churn |
|
||||
| `amazon-q-developer-cli` | Rust auth/security reference | Auth/security/workspace patterns and Rust CLI lessons where applicable | Product direction; local README says the open source project is no longer actively maintained |
|
||||
| `open-codex` | Older/forked approval-mode comparison | Approval-mode vocabulary and provider abstraction history | Fork-specific Chat Completions direction as a primary architecture |
|
||||
| `symphony` | Work orchestration above individual agents | Issue-tracker polling, per-issue isolated workspaces, repo-owned `WORKFLOW.md`, Codex app-server lifecycle, retries, operator state, CI/PR review and landing loops | High-trust unattended defaults without Forge's UOK gates and DB-first runtime evidence |
|
||||
| `codemachine` | Multi-agent spec-to-code orchestration | Engine matrix, SmartRouter routing, heterogeneous agents, spec-to-code templates, feature flags, tool health, local workflow examples, upstream repeatable long-running workflow model | Optional MCP-server/tooling posture and Bun-specific implementation assumptions |
|
||||
| Kimi Code | Long-context model-specific coding agent | Kimi CLI/IDE workflow, long-context coding, subagent-oriented terminal automation, model-plan comparison | Treating provider-specific subscription/API behavior as a Forge architecture |
|
||||
| Spec Kit | Spec-driven development workflow | Constitution, prioritized user scenarios, acceptance criteria, functional requirements, measurable success criteria, spec -> plan -> tasks -> implement -> analyze loop | Replacing Forge PDD/UOK with a generic spec template instead of mapping useful pieces into PDD fields |
|
||||
|
||||
## Forge Already Has
|
||||
|
||||
|
|
@ -136,6 +182,20 @@ surfaces instead of adding parallel state systems.
|
|||
- Stop rule: do not implement any SF MCP server, MCP worker backend, or
|
||||
bundled/re-exported MCP server.
|
||||
|
||||
10. **Work orchestration above single agent sessions**
|
||||
- Use OpenAI Symphony and CodeMachine as references.
|
||||
- Target Forge surfaces: durable queue/roadmap dispatch, isolated working
|
||||
directories, issue/task lifecycle state, retry/backoff, per-run
|
||||
observability, proof-of-work handoff, and CI/PR review/landing loops.
|
||||
- Stop rule: orchestration must feed UOK and DB-backed state instead of
|
||||
bypassing Forge's safety gates.
|
||||
|
||||
11. **Spec-driven artifact pipeline**
|
||||
- Use Spec Kit and CodeMachine as references.
|
||||
- Target Forge surfaces: convert intent into PDD fields, prioritized slices,
|
||||
acceptance criteria, functional requirements, measurable success criteria,
|
||||
task generation, and consistency analysis before implementation.
|
||||
|
||||
## Priority Order
|
||||
|
||||
P0:
|
||||
|
|
@ -155,12 +215,16 @@ P1:
|
|||
- Add cumulative diff review and evidence metadata.
|
||||
- Expand UOK evals with Agentless-style localization/repair/validation cases.
|
||||
- Add MCP client state/status/config hardening without adding any MCP server.
|
||||
- Add durable orchestration contracts for issue/task queues, isolated workspaces,
|
||||
retry policy, proof-of-work, and review/landing loops.
|
||||
|
||||
P2:
|
||||
|
||||
- Improve terminal command discovery and permission UX.
|
||||
- Generate settings/environment docs from typed schemas.
|
||||
- Compare memory lifecycle/recovery against Letta and ACE.
|
||||
- Map Spec Kit scenario/requirement/success-criteria templates into Forge PDD
|
||||
fields without replacing PDD.
|
||||
|
||||
## Evidence Pointers
|
||||
|
||||
|
|
@ -201,6 +265,10 @@ The follow-up subagent pass inspected these concrete local paths:
|
|||
- `ace-coder/docs/MCP_SERVER.md`,
|
||||
`ace-coder/docs/plans/2026-04-05-mcp-daemon-refactor.md`,
|
||||
`ace-coder/python/ai_dev/mcp/`.
|
||||
- `symphony/README.md`, `symphony/SPEC.md`, `symphony/elixir/WORKFLOW.md`,
|
||||
`symphony/elixir/AGENTS.md`, and `.codex/skills/land/SKILL.md`.
|
||||
- `singularity/machine/README.md`, `package.json`, `templates/workflows/`,
|
||||
`docs/architecture/engine-matrix.md`, and `docs/OPENAI_SPECS_DOWNLOAD.md`.
|
||||
|
||||
## Context7 Cross-Check
|
||||
|
||||
|
|
@ -226,14 +294,58 @@ snapshot available on this machine.
|
|||
`/websites/qwenlm_github_io_qwen-code-docs`, and
|
||||
`/websites/qwenlm_github_io_qwen-code-docs_en`.
|
||||
- OpenCode: `/anomalyco/opencode`.
|
||||
- OpenAI Symphony: `/openai/symphony`.
|
||||
- Kimi Code: `/moonshotai/kimi-cli`,
|
||||
`/websites/moonshotai_github_io_kimi-cli_en`, and `/websites/kimi_code`.
|
||||
- Spec Kit: `/github/spec-kit` and `/websites/github_github_io_spec-kit`.
|
||||
- Upstream CodeMachine CLI did not resolve by name in Context7 during this
|
||||
pass, but GitHub confirms `https://github.com/moazbuilds/CodeMachine-CLI`
|
||||
as the public upstream-style repo for CodeMachine CLI. The local checkout
|
||||
inspected is `https://github.com/singularity-ng/machine.git`, so treat it as
|
||||
local fork/mirror evidence rather than exact upstream state.
|
||||
|
||||
## Local Sift Cross-Check
|
||||
|
||||
ACE is private/local and should not be treated as Context7-indexed. Use `sift`
|
||||
for ACE and Forge when checking private or machine-local architecture.
|
||||
|
||||
For dependency hygiene, do not run broad `sift search` over repo roots that may
|
||||
contain vendored dependencies, package caches, build output, or generated blobs.
|
||||
This `sift` install does not expose an exclude flag, so scope searches to
|
||||
first-party paths such as `docs/`, `src/`, `packages/*/src/`, `specs/`,
|
||||
`AGENTS.md`, `CLAUDE.md`, and known design files. Avoid `node_modules/`,
|
||||
`vendor/`, `dist/`, `build/`, `target/`, `.venv/`, caches, fixture dumps, and
|
||||
generated lock/schema/output directories unless the dependency surface itself is
|
||||
the subject of the question.
|
||||
|
||||
The targeted `sift` pass found:
|
||||
|
||||
- Codex `codex-rs/protocol/src/config_types.rs` and `protocol.rs`: confirms
|
||||
first-party typed approval policy and sandbox mode surfaces without searching
|
||||
`codex-rs/vendor/`.
|
||||
- OpenCode `packages/opencode/specs/effect/schema.md`: confirms the
|
||||
schema-first rule to prefer one canonical schema definition and derive
|
||||
compatibility schemas instead of maintaining parallel sources of truth.
|
||||
- Aider first-party docs/tests: confirms local repo-map/edit-format/lint/test
|
||||
and commit behavior surfaces.
|
||||
- Plandex `README.md`, changelog, and first-party app model files: confirms the
|
||||
cumulative diff sandbox, controlled command execution, rollback/debug loop,
|
||||
and planning phases.
|
||||
- Qwen Code `docs/`: confirms terminal-capture integration tests, trusted
|
||||
folders documentation, and provider configuration docs.
|
||||
- RA.Aid first-party docs/source: confirms shell command approval bypass via
|
||||
`--cowboy-mode`, research/planning agents, and session/logging surfaces.
|
||||
- Symphony first-party spec/workflow files: confirm issue-tracker polling,
|
||||
per-issue workspace isolation, repo-owned `WORKFLOW.md`, Codex app-server
|
||||
lifecycle, max turns/concurrency, retry/backoff, state snapshots, token/rate
|
||||
observability, PR feedback sweeps, and land-loop skills.
|
||||
- CodeMachine first-party docs/templates: confirm local multi-agent
|
||||
orchestration, heterogeneous engine routing, spec-to-code workflow templates,
|
||||
feature-flag governance, health/status commands, and optional MCP tooling.
|
||||
GitHub upstream `moazbuilds/CodeMachine-CLI` confirms the public product
|
||||
framing: repeatable long-running workflows, multi-agent orchestration,
|
||||
parallel execution, context engineering, and headless scripting of coding
|
||||
engines such as Claude Code, Codex, Cursor, and others.
|
||||
- ACE `AGENTS.md`: confirms the repo-local Claude MCP client contract, hard
|
||||
stops, skills, reviewer workflow, quality gate, and the warning that ACE's
|
||||
autonomous system uses its own code/YAML workflow DAGs rather than
|
||||
|
|
@ -249,6 +361,28 @@ The targeted `sift` pass found:
|
|||
- Forge `docs/records/2026-05-07-cli-agent-code-survey.md`: now records the
|
||||
MCP-client-only product boundary and roadmap pull-through.
|
||||
|
||||
## Implementation Follow-Up
|
||||
|
||||
The first DB-backed retrieval slice landed with schema v41:
|
||||
|
||||
- `retrieval_evidence` records backend, source kind, query, strategy, scope,
|
||||
project root, git head/branch, worktree dirty flag, freshness, status, hit
|
||||
count, elapsed time, cache path, error, result metadata, and timestamp.
|
||||
- `sift_search` and `codebase_search` write retrieval evidence for successful
|
||||
and failed searches.
|
||||
- Native Context7 `resolve_library` and `get_library_docs` write docs retrieval
|
||||
evidence with `freshness=external-index`.
|
||||
- `search-the-web` writes web retrieval evidence with `freshness=external-live`
|
||||
for success, cache hits, missing-provider errors, duplicate-loop stops,
|
||||
budget exhaustion, aborts, and provider failures.
|
||||
- `sf_retrieval_evidence` exposes the rows through the SF read-only DB tool
|
||||
surface so agents do not query `.sf/sf.db` directly.
|
||||
- Sift telemetry now uses the no-op debug logger; telemetry failures no longer
|
||||
turn successful searches into failed tool calls.
|
||||
|
||||
Next slices should wrap `search_and_read` and `fetch_page` results in the same
|
||||
evidence contract before using them for planning.
|
||||
|
||||
## Resulting Direction
|
||||
|
||||
Forge should absorb proven patterns into UOK and the existing DB-first runtime:
|
||||
|
|
|
|||
|
|
@ -7,5 +7,5 @@ Repo-memory audits, decision ledgers, context-gardening notes, and records-keepe
|
|||
| Date | Note | Summary |
|
||||
|------|------|---------|
|
||||
| 2026-05-01 | [repo-vcs and notifications](./2026-05-01-repo-vcs-and-notifications.md) | repo-vcs skill landed; notification specs drafted; JSDoc annotations added; placeholder docs filled |
|
||||
| 2026-05-07 | [cli agent code survey](./2026-05-07-cli-agent-code-survey.md) | compared local CLI agent checkouts plus Context7 cross-checks; priority pulls are execution permissions, typed headless events, DB-first state, trust gating, cumulative diffs, eval pipelines, and MCP-client-only lifecycle hardening |
|
||||
| 2026-05-07 | [SF + ACE full-stack reference survey](./2026-05-07-cli-agent-code-survey.md) | repo-wise map of coding agents, orchestration systems, retrieval tools, model references, and platform/golden-path systems; priority pulls are execution permissions, typed headless events, DB-first state, trust gating, orchestration, cumulative diffs, eval pipelines, and MCP-client-only lifecycle hardening |
|
||||
| 2026-05-07 | [strategy alignment](./2026-05-07-strategy-alignment.md) | aligned top-level docs and roadmap framing around Forge as product, UOK as kernel, and external CLIs as sharpening inputs |
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ Step-by-step setup instructions for every LLM provider SF supports. If you ran t
|
|||
|----------|-------------|-------------|-------------|
|
||||
| Anthropic | API key | `ANTHROPIC_API_KEY` | — |
|
||||
| OpenAI | API key | `OPENAI_API_KEY` | — |
|
||||
| Google Gemini | Gemini CLI Core auth | — | `~/.gemini/oauth_creds.json` |
|
||||
| Google Gemini | Gemini CLI Core auth (default) or API key when enabled in config | `GEMINI_API_KEY` | `~/.gemini/oauth_creds.json` |
|
||||
| OpenRouter | API key | `OPENROUTER_API_KEY` | Optional `models.json` |
|
||||
| Groq | API key | `GROQ_API_KEY` | — |
|
||||
| xAI | API key | `XAI_API_KEY` | — |
|
||||
|
|
@ -92,8 +92,19 @@ Authenticate there once and let SF reuse the stored auth state.
|
|||
gemini login
|
||||
```
|
||||
|
||||
SF intentionally ignores ambient `GEMINI_API_KEY` and
|
||||
`GOOGLE_GENERATIVE_AI_API_KEY` values for Forge runtime selection.
|
||||
By default, Forge ignores ambient `GEMINI_API_KEY` and
|
||||
`GOOGLE_GENERATIVE_AI_API_KEY` values for runtime selection. To allow env auth
|
||||
for the direct `google` provider, set it in config:
|
||||
|
||||
```json
|
||||
{
|
||||
"providerEnvAuth": {
|
||||
"providers": {
|
||||
"google": "on"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### OpenRouter
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ Step-by-step setup instructions for every LLM provider SF supports. If you ran t
|
|||
|----------|-------------|---------------------|
|
||||
| Anthropic | OAuth or API key | `ANTHROPIC_API_KEY` |
|
||||
| OpenAI | API key | `OPENAI_API_KEY` |
|
||||
| Google Gemini | Gemini CLI Core auth | `~/.gemini/oauth_creds.json` |
|
||||
| Google Gemini | Gemini CLI Core auth (default) or API key when enabled in config | `GEMINI_API_KEY` |
|
||||
| OpenRouter | API key | `OPENROUTER_API_KEY` |
|
||||
| Groq | API key | `GROQ_API_KEY` |
|
||||
| xAI (Grok) | API key | `XAI_API_KEY` |
|
||||
|
|
@ -58,8 +58,18 @@ Authenticate Gemini CLI Core once and let SF reuse that state:
|
|||
gemini login
|
||||
```
|
||||
|
||||
SF intentionally ignores `GEMINI_API_KEY` and `GOOGLE_GENERATIVE_AI_API_KEY`
|
||||
for Forge runtime selection.
|
||||
Forge ignores `GEMINI_API_KEY` and `GOOGLE_GENERATIVE_AI_API_KEY` by default.
|
||||
To let the direct `google` provider use env auth, enable it in config:
|
||||
|
||||
```json
|
||||
{
|
||||
"providerEnvAuth": {
|
||||
"providers": {
|
||||
"google": "on"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### OpenRouter
|
||||
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@
|
|||
|----------|----------|
|
||||
| `ANTHROPIC_API_KEY` | Anthropic (Claude) |
|
||||
| `OPENAI_API_KEY` | OpenAI |
|
||||
| `GEMINI_API_KEY` | Google Gemini (ignored by Forge runtime; Gemini CLI Core auth is used instead) |
|
||||
| `GOOGLE_GENERATIVE_AI_API_KEY` | Google Gemini alias (ignored by Forge runtime) |
|
||||
| `GEMINI_API_KEY` | Google Gemini (available to the direct `google` provider, but disabled by default via `providerEnvAuth`) |
|
||||
| `GOOGLE_GENERATIVE_AI_API_KEY` | Google Gemini alias (same policy as `GEMINI_API_KEY`) |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter |
|
||||
| `GROQ_API_KEY` | Groq |
|
||||
| `XAI_API_KEY` | xAI (Grok) |
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { describe, it } from "vitest";
|
|||
import { getEnvApiKey } from "./env-api-keys.js";
|
||||
|
||||
describe("getEnvApiKey", () => {
|
||||
it("ignores GEMINI_API_KEY for google when present", () => {
|
||||
it("uses GEMINI_API_KEY for google when present", () => {
|
||||
const savedGemini = process.env.GEMINI_API_KEY;
|
||||
const savedGoogleGenerative = process.env.GOOGLE_GENERATIVE_AI_API_KEY;
|
||||
|
||||
|
|
@ -11,8 +11,7 @@ describe("getEnvApiKey", () => {
|
|||
process.env.GOOGLE_GENERATIVE_AI_API_KEY = "google-generative-key";
|
||||
|
||||
try {
|
||||
assert.equal(getEnvApiKey("google"), undefined);
|
||||
assert.equal(getEnvApiKey("google-gemini-cli"), undefined);
|
||||
assert.equal(getEnvApiKey("google"), "gemini-key");
|
||||
} finally {
|
||||
if (savedGemini === undefined) delete process.env.GEMINI_API_KEY;
|
||||
else process.env.GEMINI_API_KEY = savedGemini;
|
||||
|
|
@ -22,7 +21,7 @@ describe("getEnvApiKey", () => {
|
|||
}
|
||||
});
|
||||
|
||||
it("ignores GOOGLE_GENERATIVE_AI_API_KEY for google", () => {
|
||||
it("accepts GOOGLE_GENERATIVE_AI_API_KEY for google", () => {
|
||||
const savedGemini = process.env.GEMINI_API_KEY;
|
||||
const savedGoogleGenerative = process.env.GOOGLE_GENERATIVE_AI_API_KEY;
|
||||
|
||||
|
|
@ -30,7 +29,7 @@ describe("getEnvApiKey", () => {
|
|||
process.env.GOOGLE_GENERATIVE_AI_API_KEY = "google-generative-key";
|
||||
|
||||
try {
|
||||
assert.equal(getEnvApiKey("google"), undefined);
|
||||
assert.equal(getEnvApiKey("google"), "google-generative-key");
|
||||
} finally {
|
||||
if (savedGemini === undefined) delete process.env.GEMINI_API_KEY;
|
||||
else process.env.GEMINI_API_KEY = savedGemini;
|
||||
|
|
|
|||
|
|
@ -73,13 +73,6 @@ function hasVertexAdcCredentials(): boolean {
|
|||
export function getEnvApiKey(provider: KnownProvider): string | undefined;
|
||||
export function getEnvApiKey(provider: string): string | undefined;
|
||||
export function getEnvApiKey(provider: any): string | undefined {
|
||||
// Forge routes Gemini-family models through google-gemini-cli, which owns
|
||||
// auth via Gemini CLI Core state. Intentionally ignore Google API-key env vars
|
||||
// here so ambient GEMINI_API_KEY values do not change provider selection.
|
||||
if (provider === "google" || provider === "google-gemini-cli") {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Fall back to environment variables
|
||||
if (provider === "github-copilot") {
|
||||
return (
|
||||
|
|
@ -161,6 +154,7 @@ export function getEnvApiKey(provider: any): string | undefined {
|
|||
const envMap: Record<string, string | string[]> = {
|
||||
openai: "OPENAI_API_KEY",
|
||||
"azure-openai-responses": "AZURE_OPENAI_API_KEY",
|
||||
google: ["GEMINI_API_KEY", "GOOGLE_GENERATIVE_AI_API_KEY"],
|
||||
groq: "GROQ_API_KEY",
|
||||
cerebras: "CEREBRAS_API_KEY",
|
||||
xai: "XAI_API_KEY",
|
||||
|
|
|
|||
|
|
@ -37,10 +37,6 @@ function hasVertexAdcCredentials(): boolean {
|
|||
export function getEnvApiKey(provider: KnownProvider): string | undefined;
|
||||
export function getEnvApiKey(provider: string): string | undefined;
|
||||
export function getEnvApiKey(provider: string): string | undefined {
|
||||
if (provider === "google" || provider === "google-gemini-cli") {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (provider === "github-copilot") {
|
||||
return (
|
||||
process.env.COPILOT_GITHUB_TOKEN ||
|
||||
|
|
@ -93,6 +89,7 @@ export function getEnvApiKey(provider: string): string | undefined {
|
|||
const envMap: Record<string, string | string[]> = {
|
||||
openai: "OPENAI_API_KEY",
|
||||
"azure-openai-responses": "AZURE_OPENAI_API_KEY",
|
||||
google: ["GEMINI_API_KEY", "GOOGLE_GENERATIVE_AI_API_KEY"],
|
||||
groq: "GROQ_API_KEY",
|
||||
cerebras: "CEREBRAS_API_KEY",
|
||||
xai: "XAI_API_KEY",
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import { getAgentDir } from "../config.js";
|
|||
import { AUTH_LOCK_STALE_MS } from "./constants.js";
|
||||
import { acquireLockAsync, acquireLockSyncWithRetry } from "./lock-utils.js";
|
||||
import { resolveConfigValueAsync } from "./resolve-config-value.js";
|
||||
import type { ProviderEnvAuthMode } from "./settings-manager.js";
|
||||
|
||||
export type ApiKeyCredential = {
|
||||
type: "api_key";
|
||||
|
|
@ -266,6 +267,7 @@ export class AuthStorage {
|
|||
private data: AuthStorageData = {};
|
||||
private runtimeOverrides: Map<string, string> = new Map();
|
||||
private fallbackResolver?: (provider: string) => string | undefined;
|
||||
private envAuthModeResolver?: (provider: string) => ProviderEnvAuthMode;
|
||||
private loadError: Error | null = null;
|
||||
private errors: Error[] = [];
|
||||
private credentialChangeListeners: Set<() => void> = new Set();
|
||||
|
|
@ -337,6 +339,12 @@ export class AuthStorage {
|
|||
this.fallbackResolver = resolver;
|
||||
}
|
||||
|
||||
setEnvAuthModeResolver(
|
||||
resolver: (provider: string) => ProviderEnvAuthMode,
|
||||
): void {
|
||||
this.envAuthModeResolver = resolver;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a callback to be notified when credentials change (e.g., after OAuth token refresh).
|
||||
* Returns a function to unregister the listener.
|
||||
|
|
@ -500,7 +508,7 @@ export class AuthStorage {
|
|||
hasAuth(provider: string): boolean {
|
||||
if (this.runtimeOverrides.has(provider)) return true;
|
||||
if (this.data[provider]) return true;
|
||||
if (getEnvApiKey(provider)) return true;
|
||||
if (this.getConfiguredEnvApiKey(provider)) return true;
|
||||
if (this.fallbackResolver?.(provider)) return true;
|
||||
return false;
|
||||
}
|
||||
|
|
@ -982,9 +990,8 @@ export class AuthStorage {
|
|||
// All credentials backed off or unresolvable - fall through to env/fallback
|
||||
}
|
||||
|
||||
// Fall back to environment variable. Gemini-family providers intentionally
|
||||
// ignore ambient GEMINI_API_KEY values via getEnvApiKey().
|
||||
const envKey = getEnvApiKey(providerId);
|
||||
// Fall back to environment variable when provider policy allows it.
|
||||
const envKey = this.getConfiguredEnvApiKey(providerId);
|
||||
if (envKey) {
|
||||
// Block Google OAuth tokens from environment variables (e.g., GEMINI_API_KEY=ya29.*)
|
||||
if (
|
||||
|
|
@ -1007,6 +1014,16 @@ export class AuthStorage {
|
|||
return this.fallbackResolver?.(providerId) ?? undefined;
|
||||
}
|
||||
|
||||
private getConfiguredEnvApiKey(provider: string): string | undefined {
|
||||
const mode =
|
||||
this.envAuthModeResolver?.(provider) ??
|
||||
(provider === "google" || provider === "google-gemini-cli"
|
||||
? "off"
|
||||
: "auto");
|
||||
if (mode === "off") return undefined;
|
||||
return getEnvApiKey(provider);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all registered OAuth providers
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -82,14 +82,13 @@ function createResolver(overrides?: {
|
|||
// ─── findFallback ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("FallbackResolver — findFallback", () => {
|
||||
it("returns next available provider when current fails", async () => {
|
||||
it("reselects from the current available models when current fails", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "alibaba");
|
||||
assert.equal(result!.model.id, "glm-5");
|
||||
assert.equal(result!.chainName, "coding");
|
||||
assert.equal(result!.chainName, "fresh-selection");
|
||||
});
|
||||
|
||||
it("marks current provider as exhausted for rate_limit errors", async () => {
|
||||
|
|
@ -142,12 +141,12 @@ describe("FallbackResolver — findFallback", () => {
|
|||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("falls back to free selection when model is not in any chain", async () => {
|
||||
it("reselects from scratch when model is not in any chain", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const unknownModel = createMockModel("unknown", "some-model");
|
||||
const result = await resolver.findFallback(unknownModel, "quota_exhausted");
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.chainName, "free-selection");
|
||||
assert.equal(result!.chainName, "fresh-selection");
|
||||
// Should pick an available model with different provider
|
||||
assert.notEqual(result!.model.provider, "unknown");
|
||||
});
|
||||
|
|
@ -208,11 +207,7 @@ describe("FallbackResolver — findFallback", () => {
|
|||
|
||||
it("skips providers with no model in registry", async () => {
|
||||
const { resolver } = createResolver({
|
||||
find: (provider: string, modelId: string) => {
|
||||
if (provider === "alibaba") return undefined;
|
||||
if (provider === "openai" && modelId === "gpt-4.1") return openaiModel;
|
||||
return undefined;
|
||||
},
|
||||
getAvailable: () => [openaiModel],
|
||||
});
|
||||
|
||||
const result = await resolver.findFallback(zaiModel, "quota_exhausted");
|
||||
|
|
@ -225,33 +220,9 @@ describe("FallbackResolver — findFallback", () => {
|
|||
// ─── checkForRestoration ─────────────────────────────────────────────────────
|
||||
|
||||
describe("FallbackResolver — checkForRestoration", () => {
|
||||
it("returns higher-priority provider when recovered", async () => {
|
||||
it("returns null because restoration is disabled", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const result = await resolver.checkForRestoration(alibabaModel);
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.model.provider, "zai");
|
||||
assert.equal(result!.model.id, "glm-5");
|
||||
});
|
||||
|
||||
it("returns null when already at highest priority", async () => {
|
||||
const { resolver } = createResolver();
|
||||
const result = await resolver.checkForRestoration(zaiModel);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("returns null when higher-priority provider is still backed off", async () => {
|
||||
const { resolver } = createResolver({
|
||||
isProviderAvailable: (provider: string) => provider !== "zai",
|
||||
});
|
||||
|
||||
const result = await resolver.checkForRestoration(alibabaModel);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it("returns null when fallback is disabled", async () => {
|
||||
const { resolver } = createResolver({ enabled: false });
|
||||
const result = await resolver.checkForRestoration(alibabaModel);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,11 +2,10 @@
|
|||
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
||||
|
||||
/**
|
||||
* FallbackResolver - Cross-provider fallback when rate/quota limits are hit.
|
||||
* FallbackResolver - Fresh model reselection when rate/quota limits are hit.
|
||||
*
|
||||
* When a provider's credentials are all exhausted, this resolver finds the next
|
||||
* available provider+model from a user-configured fallback chain. It also handles
|
||||
* restoration: checking if a higher-priority provider has recovered before each request.
|
||||
* When a provider/model becomes unhealthy, this resolver picks a fresh model from
|
||||
* the current available registry rather than walking a preconfigured fallback chain.
|
||||
*/
|
||||
|
||||
import type { Api, Model } from "@singularity-forge/pi-ai";
|
||||
|
|
@ -31,20 +30,16 @@ export class FallbackResolver {
|
|||
) {}
|
||||
|
||||
/**
|
||||
* Find the next available fallback for a model that just failed.
|
||||
* Searches all chains for entries matching the current model's provider+id,
|
||||
* then returns the next available entry with lower priority (higher number).
|
||||
* Find a fresh replacement for a model that just failed.
|
||||
* Ignores fallback chains and reselects from the current available registry.
|
||||
*
|
||||
* If no chain contains the current model, falls through to free selection:
|
||||
* picks any available model from the registry with a different provider.
|
||||
*
|
||||
* @returns FallbackResult if a fallback is available, null otherwise
|
||||
* @returns FallbackResult if a replacement is available, null otherwise
|
||||
*/
|
||||
async findFallback(
|
||||
currentModel: Model<Api>,
|
||||
errorType: UsageLimitErrorType,
|
||||
): Promise<FallbackResult | null> {
|
||||
const { enabled, chains } = this.settingsManager.getFallbackSettings();
|
||||
const { enabled } = this.settingsManager.getFallbackSettings();
|
||||
if (!enabled) return null;
|
||||
|
||||
// Mark the current provider as exhausted at the provider level.
|
||||
|
|
@ -55,75 +50,16 @@ export class FallbackResolver {
|
|||
this.authStorage.markProviderExhausted(currentModel.provider, errorType);
|
||||
}
|
||||
|
||||
// Search all chains for one containing the current model
|
||||
for (const [chainName, entries] of Object.entries(chains)) {
|
||||
const currentIndex = entries.findIndex(
|
||||
(e) =>
|
||||
e.provider === currentModel.provider && e.model === currentModel.id,
|
||||
);
|
||||
|
||||
if (currentIndex === -1) continue;
|
||||
|
||||
// Try entries after the current one (already sorted by priority)
|
||||
const result = await this._findAvailableInChain(
|
||||
chainName,
|
||||
entries,
|
||||
currentIndex + 1,
|
||||
);
|
||||
if (result) return result;
|
||||
|
||||
// Wrap around: try entries before the current one
|
||||
const wrapResult = await this._findAvailableInChain(
|
||||
chainName,
|
||||
entries,
|
||||
0,
|
||||
currentIndex,
|
||||
);
|
||||
if (wrapResult) return wrapResult;
|
||||
}
|
||||
|
||||
// No chain contained the current model — fall through to free selection
|
||||
// from any available model in the registry with a different provider.
|
||||
return this._findAnyAvailableFallback(currentModel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a higher-priority provider in the chain has recovered.
|
||||
* Called before each LLM request to restore the best available provider.
|
||||
*
|
||||
* @returns FallbackResult if a better provider is available, null if current is best
|
||||
* Automatic restoration is disabled when replacement is always reselected
|
||||
* from scratch instead of following a chain.
|
||||
*/
|
||||
async checkForRestoration(
|
||||
currentModel: Model<Api>,
|
||||
_currentModel: Model<Api>,
|
||||
): Promise<FallbackResult | null> {
|
||||
const { enabled, chains } = this.settingsManager.getFallbackSettings();
|
||||
if (!enabled) return null;
|
||||
|
||||
for (const [chainName, entries] of Object.entries(chains)) {
|
||||
const currentIndex = entries.findIndex(
|
||||
(e) =>
|
||||
e.provider === currentModel.provider && e.model === currentModel.id,
|
||||
);
|
||||
|
||||
if (currentIndex === -1) continue;
|
||||
|
||||
// Only check entries with higher priority (lower index = higher priority)
|
||||
if (currentIndex === 0) continue; // Already at highest priority
|
||||
|
||||
const result = await this._findAvailableInChain(
|
||||
chainName,
|
||||
entries,
|
||||
0,
|
||||
currentIndex,
|
||||
);
|
||||
if (result) {
|
||||
return {
|
||||
...result,
|
||||
reason: `${result.model.provider}/${result.model.id} recovered, restoring from fallback`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -227,8 +163,8 @@ export class FallbackResolver {
|
|||
const chosen = candidates[0];
|
||||
return {
|
||||
model: chosen,
|
||||
chainName: "free-selection",
|
||||
reason: `free fallback to ${chosen.provider}/${chosen.id} (no chain configured)`,
|
||||
chainName: "fresh-selection",
|
||||
reason: `reselected ${chosen.provider}/${chosen.id} from available models`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import assert from "node:assert/strict";
|
|||
import { describe, it } from "vitest";
|
||||
import type { AuthStorage } from "./auth-storage.js";
|
||||
import { ModelRegistry } from "./model-registry.js";
|
||||
import { type Settings, SettingsManager } from "./settings-manager.js";
|
||||
|
||||
function createRegistryWithCapturedResolver() {
|
||||
let capturedResolver: ((provider: string) => string | undefined) | undefined;
|
||||
|
|
@ -11,6 +12,7 @@ function createRegistryWithCapturedResolver() {
|
|||
) => {
|
||||
capturedResolver = resolver;
|
||||
},
|
||||
setEnvAuthModeResolver: () => {},
|
||||
onCredentialChange: () => {},
|
||||
getOAuthProviders: () => [],
|
||||
get: () => undefined,
|
||||
|
|
@ -26,6 +28,29 @@ function createRegistryWithCapturedResolver() {
|
|||
return capturedResolver!;
|
||||
}
|
||||
|
||||
function createRegistryWithSettingsAndCapturedResolver(
|
||||
settings: Partial<Settings>,
|
||||
) {
|
||||
let capturedResolver: ((provider: string) => string | undefined) | undefined;
|
||||
const authStorage = {
|
||||
setFallbackResolver: (
|
||||
resolver: (provider: string) => string | undefined,
|
||||
) => {
|
||||
capturedResolver = resolver;
|
||||
},
|
||||
setEnvAuthModeResolver: () => {},
|
||||
onCredentialChange: () => {},
|
||||
getOAuthProviders: () => [],
|
||||
get: () => undefined,
|
||||
hasAuth: () => false,
|
||||
getApiKey: async () => undefined,
|
||||
} as unknown as AuthStorage;
|
||||
|
||||
new ModelRegistry(authStorage, undefined, SettingsManager.inMemory(settings));
|
||||
assert.ok(capturedResolver);
|
||||
return capturedResolver!;
|
||||
}
|
||||
|
||||
describe("ModelRegistry env fallback resolver (#3782)", () => {
|
||||
it("falls back to built-in provider env vars when models.json has no custom key", () => {
|
||||
const prev = process.env.MINIMAX_API_KEY;
|
||||
|
|
@ -61,4 +86,38 @@ describe("ModelRegistry env fallback resolver (#3782)", () => {
|
|||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("disables google env fallback by default", () => {
|
||||
const prev = process.env.GEMINI_API_KEY;
|
||||
process.env.GEMINI_API_KEY = "gemini-env-test-key";
|
||||
|
||||
try {
|
||||
const resolver = createRegistryWithSettingsAndCapturedResolver({});
|
||||
assert.equal(resolver("google"), undefined);
|
||||
} finally {
|
||||
if (prev === undefined) {
|
||||
delete process.env.GEMINI_API_KEY;
|
||||
} else {
|
||||
process.env.GEMINI_API_KEY = prev;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("allows provider env fallback when providerEnvAuth is on", () => {
|
||||
const prev = process.env.GEMINI_API_KEY;
|
||||
process.env.GEMINI_API_KEY = "gemini-env-test-key";
|
||||
|
||||
try {
|
||||
const resolver = createRegistryWithSettingsAndCapturedResolver({
|
||||
providerEnvAuth: { providers: { google: "on" } },
|
||||
});
|
||||
assert.equal(resolver("google"), "gemini-env-test-key");
|
||||
} finally {
|
||||
if (prev === undefined) {
|
||||
delete process.env.GEMINI_API_KEY;
|
||||
} else {
|
||||
process.env.GEMINI_API_KEY = prev;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -486,6 +486,19 @@ export class ModelRegistry {
|
|||
discoveryCache?: ModelDiscoveryCache,
|
||||
) {
|
||||
this.discoveryCache = discoveryCache ?? new ModelDiscoveryCache();
|
||||
(
|
||||
this.authStorage as {
|
||||
setEnvAuthModeResolver?: (
|
||||
resolver: (provider: string) => string,
|
||||
) => void;
|
||||
}
|
||||
).setEnvAuthModeResolver?.(
|
||||
(provider) =>
|
||||
this.settingsManager?.getProviderEnvAuthMode(provider) ??
|
||||
(provider === "google" || provider === "google-gemini-cli"
|
||||
? "off"
|
||||
: "auto"),
|
||||
);
|
||||
|
||||
// Set up fallback resolver for custom provider API keys
|
||||
this.authStorage.setFallbackResolver((provider) => {
|
||||
|
|
@ -493,6 +506,14 @@ export class ModelRegistry {
|
|||
if (keyConfig) {
|
||||
return resolveConfigValue(keyConfig);
|
||||
}
|
||||
if (
|
||||
(this.settingsManager?.getProviderEnvAuthMode(provider) ??
|
||||
(provider === "google" || provider === "google-gemini-cli"
|
||||
? "off"
|
||||
: "auto")) === "off"
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
return getEnvApiKey(provider);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -206,8 +206,8 @@ export class RetryHandler {
|
|||
}
|
||||
}
|
||||
|
||||
// Cross-provider fallback — for rate limits with all creds backed off,
|
||||
// quota errors, or auth errors (invalid/expired key — no point retrying).
|
||||
// Fresh model reselection — for rate limits, quota errors, or auth errors
|
||||
// once the same-model retry budget has been meaningfully exercised.
|
||||
const isAuthError = errorType === "auth_error";
|
||||
if (isRateLimit || isQuotaError || isAuthError) {
|
||||
// For quota errors with a retry-after hint, wait before switching providers.
|
||||
|
|
@ -260,67 +260,39 @@ export class RetryHandler {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
const fallbackResult = await this._deps.fallbackResolver.findFallback(
|
||||
this._deps.getModel()!,
|
||||
errorType,
|
||||
);
|
||||
|
||||
if (fallbackResult) {
|
||||
const previousProvider = this._deps.getModel()!.provider;
|
||||
this._deps.agent.setModel(fallbackResult.model);
|
||||
this._deps.onModelChange(fallbackResult.model);
|
||||
this._removeLastAssistantError();
|
||||
|
||||
this._deps.emit({
|
||||
type: "fallback_provider_switch",
|
||||
from: `${previousProvider}/${this._deps.getModel()?.id}`,
|
||||
to: `${fallbackResult.model.provider}/${fallbackResult.model.id}`,
|
||||
reason: fallbackResult.reason,
|
||||
});
|
||||
|
||||
this._deps.emit({
|
||||
type: "auto_retry_start",
|
||||
attempt: this._retryAttempt + 1,
|
||||
maxAttempts: settings.maxRetries,
|
||||
delayMs: 0,
|
||||
errorMessage: `${message.errorMessage} (${fallbackResult.reason})`,
|
||||
});
|
||||
|
||||
// Retry immediately with fallback provider - don't increment _retryAttempt
|
||||
this._scheduleContinue(retryGeneration);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// No fallback available either
|
||||
if (isQuotaError) {
|
||||
// Try long-context model downgrade ([1m] → base) before giving up
|
||||
const downgraded = this._tryLongContextDowngrade(
|
||||
const provider = this._deps.getModel()!.provider;
|
||||
const authMode = this._deps.modelRegistry.getProviderAuthMode(provider);
|
||||
const shouldReselectImmediately =
|
||||
isQuotaError ||
|
||||
isAuthError ||
|
||||
this._isCapacityError(message.errorMessage) ||
|
||||
(isRateLimit && authMode === "externalCli");
|
||||
if (shouldReselectImmediately) {
|
||||
return this._tryFreshModelSelection(
|
||||
message,
|
||||
errorType,
|
||||
retryGeneration,
|
||||
);
|
||||
if (downgraded) return true;
|
||||
|
||||
this._deps.emit({
|
||||
type: "fallback_chain_exhausted",
|
||||
reason: `All providers exhausted for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`,
|
||||
});
|
||||
this._deps.emit({
|
||||
type: "auto_retry_end",
|
||||
success: false,
|
||||
attempt: this._retryAttempt,
|
||||
finalError: message.errorMessage,
|
||||
});
|
||||
this._retryAttempt = 0;
|
||||
this._resolveRetry();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this._retryAttempt++;
|
||||
|
||||
const errorType = message.errorMessage
|
||||
? this._classifyErrorType(message.errorMessage)
|
||||
: "unknown";
|
||||
const isRateLimit = errorType === "rate_limit";
|
||||
const isQuotaError = errorType === "quota_exhausted";
|
||||
const isAuthError = errorType === "auth_error";
|
||||
const reselectionThreshold = Math.min(settings.maxRetries, 3);
|
||||
if (
|
||||
(isRateLimit || isQuotaError || isAuthError) &&
|
||||
this._retryAttempt >= reselectionThreshold
|
||||
) {
|
||||
return this._tryFreshModelSelection(message, errorType, retryGeneration);
|
||||
}
|
||||
|
||||
if (this._retryAttempt > settings.maxRetries) {
|
||||
this._deps.emit({
|
||||
type: "auto_retry_end",
|
||||
|
|
@ -515,6 +487,72 @@ export class RetryHandler {
|
|||
return "unknown";
|
||||
}
|
||||
|
||||
private _isCapacityError(errorMessage: string): boolean {
|
||||
return /no capacity|capacity.*available|server.*busy|too busy/i.test(
|
||||
errorMessage,
|
||||
);
|
||||
}
|
||||
|
||||
private async _tryFreshModelSelection(
|
||||
message: AssistantMessage,
|
||||
errorType: UsageLimitErrorType,
|
||||
retryGeneration: number,
|
||||
): Promise<boolean> {
|
||||
const replacement = await this._deps.fallbackResolver.findFallback(
|
||||
this._deps.getModel()!,
|
||||
errorType,
|
||||
);
|
||||
|
||||
if (replacement) {
|
||||
const previousModel = this._deps.getModel()!;
|
||||
this._deps.agent.setModel(replacement.model);
|
||||
this._deps.onModelChange(replacement.model);
|
||||
this._removeLastAssistantError();
|
||||
|
||||
this._deps.emit({
|
||||
type: "fallback_provider_switch",
|
||||
from: `${previousModel.provider}/${previousModel.id}`,
|
||||
to: `${replacement.model.provider}/${replacement.model.id}`,
|
||||
reason: replacement.reason,
|
||||
});
|
||||
|
||||
this._deps.emit({
|
||||
type: "auto_retry_start",
|
||||
attempt: Math.max(this._retryAttempt, 1),
|
||||
maxAttempts: this._deps.settingsManager.getRetrySettings().maxRetries,
|
||||
delayMs: 0,
|
||||
errorMessage: `${message.errorMessage} (${replacement.reason})`,
|
||||
});
|
||||
|
||||
this._scheduleContinue(retryGeneration);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (errorType === "quota_exhausted") {
|
||||
const downgraded = this._tryLongContextDowngrade(
|
||||
message,
|
||||
retryGeneration,
|
||||
);
|
||||
if (downgraded) return true;
|
||||
|
||||
this._deps.emit({
|
||||
type: "fallback_chain_exhausted",
|
||||
reason: `No replacement model available for ${this._deps.getModel()!.provider}/${this._deps.getModel()!.id}`,
|
||||
});
|
||||
this._deps.emit({
|
||||
type: "auto_retry_end",
|
||||
success: false,
|
||||
attempt: this._retryAttempt,
|
||||
finalError: message.errorMessage,
|
||||
});
|
||||
this._retryAttempt = 0;
|
||||
this._resolveRetry();
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt a same-model retry by reducing maxTokens when provider reports
|
||||
* an affordability cap (e.g., "can only afford 329").
|
||||
|
|
|
|||
|
|
@ -1002,7 +1002,9 @@ export class SettingsManager {
|
|||
return (
|
||||
this.settings.providerEnvAuth?.providers?.[provider] ??
|
||||
this.settings.providerEnvAuth?.default ??
|
||||
"auto"
|
||||
(provider === "google" || provider === "google-gemini-cli"
|
||||
? "off"
|
||||
: "auto")
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import type { ModelRegistry } from "../../../core/model-registry.js";
|
|||
import type { SettingsManager } from "../../../core/settings-manager.js";
|
||||
import { theme } from "../theme/theme.js";
|
||||
import { DynamicBorder } from "./dynamic-border.js";
|
||||
import { keyHint } from "./keybinding-hints.js";
|
||||
import { keyHint, rawKeyHint } from "./keybinding-hints.js";
|
||||
|
||||
/** Display names for providers in the model selector UI. */
|
||||
const PROVIDER_DISPLAY_NAMES: Record<string, string> = {
|
||||
|
|
@ -348,7 +348,7 @@ export class ModelSelectorComponent extends Container implements Focusable {
|
|||
return (
|
||||
keyHint("tab", "scope") +
|
||||
theme.fg("muted", " (all/scoped) ") +
|
||||
keyHint("d", "disable")
|
||||
rawKeyHint("d", "disable")
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import {
|
|||
truncateHead,
|
||||
} from "@singularity-forge/pi-coding-agent";
|
||||
import { Text } from "@singularity-forge/pi-tui";
|
||||
import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js";
|
||||
|
||||
// ─── In-session cache ─────────────────────────────────────────────────────────
|
||||
// Keyed by lowercased query string
|
||||
|
|
@ -133,9 +134,33 @@ export default function (pi) {
|
|||
),
|
||||
}),
|
||||
async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
|
||||
const startedAt = Date.now();
|
||||
const projectRoot = process.cwd();
|
||||
const cacheKey = params.libraryName.toLowerCase().trim();
|
||||
if (searchCache.has(cacheKey)) {
|
||||
const cached = searchCache.get(cacheKey);
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "context7",
|
||||
sourceKind: "docs",
|
||||
query: params.query
|
||||
? `${params.libraryName} ${params.query}`
|
||||
: params.libraryName,
|
||||
strategy: "library-search",
|
||||
scope: params.libraryName,
|
||||
freshness: "external-index",
|
||||
status: "ok",
|
||||
hitCount: cached.length,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
result: {
|
||||
cached: true,
|
||||
libraries: cached.map((lib) => ({
|
||||
id: lib.id,
|
||||
title: lib.title,
|
||||
trustScore: lib.trustScore,
|
||||
benchmarkScore: lib.benchmarkScore,
|
||||
})),
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
@ -159,6 +184,20 @@ export default function (pi) {
|
|||
libs = Array.isArray(data?.results) ? data.results : [];
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "context7",
|
||||
sourceKind: "docs",
|
||||
query: params.query
|
||||
? `${params.libraryName} ${params.query}`
|
||||
: params.libraryName,
|
||||
strategy: "library-search",
|
||||
scope: params.libraryName,
|
||||
freshness: "external-index",
|
||||
status: "error",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: msg,
|
||||
});
|
||||
return {
|
||||
content: [{ type: "text", text: `Context7 search failed: ${msg}` }],
|
||||
isError: true,
|
||||
|
|
@ -171,6 +210,28 @@ export default function (pi) {
|
|||
};
|
||||
}
|
||||
searchCache.set(cacheKey, libs);
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "context7",
|
||||
sourceKind: "docs",
|
||||
query: params.query
|
||||
? `${params.libraryName} ${params.query}`
|
||||
: params.libraryName,
|
||||
strategy: "library-search",
|
||||
scope: params.libraryName,
|
||||
freshness: "external-index",
|
||||
status: "ok",
|
||||
hitCount: libs.length,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
result: {
|
||||
cached: false,
|
||||
libraries: libs.map((lib) => ({
|
||||
id: lib.id,
|
||||
title: lib.title,
|
||||
trustScore: lib.trustScore,
|
||||
benchmarkScore: lib.benchmarkScore,
|
||||
})),
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{ type: "text", text: formatLibraryList(libs, params.libraryName) },
|
||||
|
|
@ -246,6 +307,8 @@ export default function (pi) {
|
|||
),
|
||||
}),
|
||||
async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
|
||||
const startedAt = Date.now();
|
||||
const projectRoot = process.cwd();
|
||||
const tokens = Math.min(Math.max(params.tokens ?? 5000, 500), 10000);
|
||||
// Strip accidental leading @ that some models inject
|
||||
const libraryId = params.libraryId.startsWith("@")
|
||||
|
|
@ -255,6 +318,22 @@ export default function (pi) {
|
|||
const cacheKey = `${libraryId}::${query ?? ""}::${tokens}`;
|
||||
if (docCache.has(cacheKey)) {
|
||||
const cached = docCache.get(cacheKey);
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "context7",
|
||||
sourceKind: "docs",
|
||||
query: query ?? "",
|
||||
strategy: "docs-fetch",
|
||||
scope: libraryId,
|
||||
freshness: "external-index",
|
||||
status: "ok",
|
||||
hitCount: cached.trim() ? 1 : 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
result: {
|
||||
cached: true,
|
||||
tokens,
|
||||
charCount: cached.length,
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [{ type: "text", text: cached }],
|
||||
details: {
|
||||
|
|
@ -276,6 +355,19 @@ export default function (pi) {
|
|||
rawText = await apiFetchText(url.toString(), signal);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "context7",
|
||||
sourceKind: "docs",
|
||||
query: query ?? "",
|
||||
strategy: "docs-fetch",
|
||||
scope: libraryId,
|
||||
freshness: "external-index",
|
||||
status: "error",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: msg,
|
||||
result: { tokens },
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{ type: "text", text: `Context7 doc fetch failed: ${msg}` },
|
||||
|
|
@ -296,6 +388,18 @@ export default function (pi) {
|
|||
const notFound = query
|
||||
? `No documentation found for "${query}" in ${libraryId}. Try a broader query or different library ID.`
|
||||
: `No documentation found for ${libraryId}. Try resolve_library to verify the library ID.`;
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "context7",
|
||||
sourceKind: "docs",
|
||||
query: query ?? "",
|
||||
strategy: "docs-fetch",
|
||||
scope: libraryId,
|
||||
freshness: "external-index",
|
||||
status: "empty",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
result: { tokens },
|
||||
});
|
||||
return {
|
||||
content: [{ type: "text", text: notFound }],
|
||||
details: {
|
||||
|
|
@ -321,6 +425,23 @@ export default function (pi) {
|
|||
` Use a more specific query to reduce output size.]`;
|
||||
}
|
||||
docCache.set(cacheKey, finalText);
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "context7",
|
||||
sourceKind: "docs",
|
||||
query: query ?? "",
|
||||
strategy: "docs-fetch",
|
||||
scope: libraryId,
|
||||
freshness: "external-index",
|
||||
status: "ok",
|
||||
hitCount: 1,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
result: {
|
||||
cached: false,
|
||||
tokens,
|
||||
truncated: truncation.truncated,
|
||||
charCount: finalText.length,
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [{ type: "text", text: finalText }],
|
||||
details: {
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import {
|
|||
truncateHead,
|
||||
} from "@singularity-forge/pi-coding-agent";
|
||||
import { Text } from "@singularity-forge/pi-tui";
|
||||
import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js";
|
||||
import { LRUTTLCache } from "./cache.js";
|
||||
import { formatSearchResults } from "./format.js";
|
||||
import { classifyError, fetchWithRetry, fetchWithRetryTimed } from "./http.js";
|
||||
|
|
@ -58,6 +59,21 @@ export function resetSearchLoopGuardState() {
|
|||
consecutiveDupeCount = 0;
|
||||
sessionTotalSearches = 0;
|
||||
}
|
||||
function summarizeWebResults(results) {
|
||||
return results.map((result) => ({
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
age: result.age,
|
||||
}));
|
||||
}
|
||||
async function recordWebSearchEvidence(projectRoot, entry) {
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "search-the-web",
|
||||
sourceKind: "web",
|
||||
freshness: "external-live",
|
||||
...entry,
|
||||
});
|
||||
}
|
||||
// Summarizer responses: max 50 entries, 15-minute TTL
|
||||
const summarizerCache = new LRUTTLCache({ max: 50, ttlMs: 900_000 });
|
||||
// =============================================================================
|
||||
|
|
@ -575,7 +591,18 @@ export function registerSearchTool(pi) {
|
|||
),
|
||||
}),
|
||||
async execute(_toolCallId, params, signal, onUpdate, _ctx) {
|
||||
const projectRoot = process.cwd();
|
||||
const startedAt = Date.now();
|
||||
if (signal?.aborted) {
|
||||
await recordWebSearchEvidence(projectRoot, {
|
||||
query: params.query ?? "",
|
||||
strategy: "aborted",
|
||||
scope: params.domain ?? "",
|
||||
status: "aborted",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: "Search cancelled",
|
||||
});
|
||||
return {
|
||||
content: [{ type: "text", text: "Search cancelled." }],
|
||||
details: undefined,
|
||||
|
|
@ -586,6 +613,15 @@ export function registerSearchTool(pi) {
|
|||
// ------------------------------------------------------------------
|
||||
const provider = resolveSearchProvider();
|
||||
if (!provider) {
|
||||
await recordWebSearchEvidence(projectRoot, {
|
||||
query: params.query,
|
||||
strategy: "none",
|
||||
scope: params.domain ?? "",
|
||||
status: "error",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: "No search API key set",
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
@ -604,6 +640,19 @@ export function registerSearchTool(pi) {
|
|||
// Session-level search budget
|
||||
// ------------------------------------------------------------------
|
||||
if (sessionTotalSearches >= MAX_SEARCHES_PER_SESSION) {
|
||||
await recordWebSearchEvidence(projectRoot, {
|
||||
query: params.query,
|
||||
strategy: provider,
|
||||
scope: params.domain ?? "",
|
||||
status: "budget_exhausted",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: `Session search budget exhausted (${MAX_SEARCHES_PER_SESSION})`,
|
||||
result: {
|
||||
sessionTotalSearches,
|
||||
maxSearches: MAX_SEARCHES_PER_SESSION,
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
@ -661,6 +710,16 @@ export function registerSearchTool(pi) {
|
|||
if (cacheKey === lastSearchKey) {
|
||||
consecutiveDupeCount++;
|
||||
if (consecutiveDupeCount > MAX_CONSECUTIVE_DUPES) {
|
||||
await recordWebSearchEvidence(projectRoot, {
|
||||
query: params.query,
|
||||
strategy: provider,
|
||||
scope: params.domain ?? "",
|
||||
status: "search_loop",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: "Consecutive duplicate search detected",
|
||||
result: { consecutiveDupeCount },
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
@ -727,6 +786,21 @@ export function registerSearchTool(pi) {
|
|||
moreResultsAvailable: cached.moreResultsAvailable,
|
||||
provider,
|
||||
};
|
||||
await recordWebSearchEvidence(projectRoot, {
|
||||
query: params.query,
|
||||
strategy: provider,
|
||||
scope: params.domain ?? "",
|
||||
status: "ok",
|
||||
hitCount: limited.length,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
result: {
|
||||
cached: true,
|
||||
effectiveQuery,
|
||||
freshness: freshness || "none",
|
||||
hasSummary: !!summaryText,
|
||||
results: summarizeWebResults(limited),
|
||||
},
|
||||
});
|
||||
return { content: [{ type: "text", text: content }], details };
|
||||
}
|
||||
onUpdate?.({
|
||||
|
|
@ -864,9 +938,38 @@ export function registerSearchTool(pi) {
|
|||
moreResultsAvailable: searchResult.moreResultsAvailable,
|
||||
provider,
|
||||
};
|
||||
await recordWebSearchEvidence(projectRoot, {
|
||||
query: params.query,
|
||||
strategy: provider,
|
||||
scope: params.domain ?? "",
|
||||
status: "ok",
|
||||
hitCount: results.length,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
result: {
|
||||
cached: false,
|
||||
effectiveQuery,
|
||||
freshness: freshness || "none",
|
||||
hasSummary: !!summaryText,
|
||||
latencyMs,
|
||||
results: summarizeWebResults(results),
|
||||
},
|
||||
});
|
||||
return { content: [{ type: "text", text: content }], details };
|
||||
} catch (error) {
|
||||
const classified = classifyError(error);
|
||||
await recordWebSearchEvidence(projectRoot, {
|
||||
query: params.query,
|
||||
strategy: provider,
|
||||
scope: params.domain ?? "",
|
||||
status: "error",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: classified.message,
|
||||
result: {
|
||||
errorKind: classified.kind,
|
||||
retryAfterMs: classified.retryAfterMs,
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{ type: "text", text: `Search failed: ${classified.message}` },
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
// SF2 — Read-only query tools exposing DB state to the LLM via the WAL connection
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { getRetrievalEvidence } from "../sf-db.js";
|
||||
import { executeMilestoneStatus } from "../tools/workflow-tool-executors.js";
|
||||
import { ensureDbOpen } from "./dynamic-tools.js";
|
||||
export function registerQueryTools(pi) {
|
||||
|
|
@ -36,4 +37,59 @@ export function registerQueryTools(pi) {
|
|||
return executeMilestoneStatus(params);
|
||||
},
|
||||
});
|
||||
pi.registerTool({
|
||||
name: "sf_retrieval_evidence",
|
||||
label: "Retrieval Evidence",
|
||||
description:
|
||||
"Read recent retrieval provenance from the SF database. Returns source backend, query, scope, freshness, status, and result metadata. " +
|
||||
"Use this instead of querying .sf/sf.db directly when auditing Sift, codebase_search, Context7, or web-derived context.",
|
||||
promptSnippet:
|
||||
"Inspect recent retrieval evidence rows with backend, scope, freshness, and hit counts",
|
||||
promptGuidelines: [
|
||||
"Use this to verify whether context came from live source, stale indexed docs, or another retrieval backend before trusting it.",
|
||||
"Prefer rows with backend=sift and freshness=working-tree/git-head for implementation decisions.",
|
||||
],
|
||||
parameters: Type.Object({
|
||||
limit: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Maximum number of retrieval evidence rows to return.",
|
||||
default: 20,
|
||||
}),
|
||||
),
|
||||
}),
|
||||
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
|
||||
const dbAvailable = await ensureDbOpen();
|
||||
if (!dbAvailable) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "Error: SF database is not available. Cannot read retrieval evidence.",
|
||||
},
|
||||
],
|
||||
details: {
|
||||
operation: "retrieval_evidence",
|
||||
error: "db_unavailable",
|
||||
},
|
||||
};
|
||||
}
|
||||
const limit = Math.max(1, Math.min(100, params.limit ?? 20));
|
||||
const rows = getRetrievalEvidence(limit);
|
||||
const lines = [`Retrieval evidence: ${rows.length} row(s)`, ""];
|
||||
for (const row of rows) {
|
||||
lines.push(
|
||||
`- #${row.id} ${row.backend}/${row.sourceKind} ${row.status} ` +
|
||||
`freshness=${row.freshness} scope=${row.scope || "."} hits=${row.hitCount} ` +
|
||||
`query="${row.query}"`,
|
||||
);
|
||||
}
|
||||
return {
|
||||
content: [{ type: "text", text: lines.join("\n") }],
|
||||
details: {
|
||||
operation: "retrieval_evidence",
|
||||
rows,
|
||||
},
|
||||
};
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@
|
|||
* - Optional search/tool integrations (Brave, Tavily, Jina, Context7)
|
||||
*/
|
||||
import { existsSync } from "node:fs";
|
||||
import { getEnvApiKey } from "@singularity-forge/pi-ai";
|
||||
import { AuthStorage } from "@singularity-forge/pi-coding-agent";
|
||||
import { getAuthPath, PROVIDER_REGISTRY } from "./key-manager.js";
|
||||
import { loadEffectiveSFPreferences } from "./preferences.js";
|
||||
import { getConfiguredEnvApiKey } from "./provider-env-auth.js";
|
||||
import {
|
||||
couldBeVaultUri,
|
||||
hasProviderCredentialEnvVar,
|
||||
|
|
@ -141,7 +141,7 @@ function resolveKey(providerId) {
|
|||
// Check environment variable using the authoritative env var resolution
|
||||
// (handles multi-var lookups like ANTHROPIC_OAUTH_TOKEN || ANTHROPIC_API_KEY,
|
||||
// COPILOT_GITHUB_TOKEN || GH_TOKEN || GITHUB_TOKEN, Vertex ADC, Bedrock, etc.)
|
||||
if (getEnvApiKey(providerId)) {
|
||||
if (getConfiguredEnvApiKey(providerId)) {
|
||||
return { found: true, source: "env", backedOff: false };
|
||||
}
|
||||
// Check for vault:// URIs in env vars (late-binding resolution)
|
||||
|
|
@ -278,6 +278,7 @@ function checkLlmProviders() {
|
|||
label,
|
||||
category: "llm",
|
||||
status: "ok",
|
||||
source: lookup.source,
|
||||
message: `${label} — key present (${lookup.source})`,
|
||||
required: true,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
"sf_replan_slice",
|
||||
"sf_requirement_save",
|
||||
"sf_requirement_update",
|
||||
"sf_retrieval_evidence",
|
||||
"sf_resume",
|
||||
"sf_save_gate_result",
|
||||
"sf_self_feedback_resolve",
|
||||
|
|
|
|||
|
|
@ -8,11 +8,7 @@
|
|||
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import {
|
||||
getEnvApiKey,
|
||||
getModels,
|
||||
getProviders,
|
||||
} from "@singularity-forge/pi-ai";
|
||||
import { getModels, getProviders } from "@singularity-forge/pi-ai";
|
||||
import {
|
||||
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
|
||||
DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
|
||||
|
|
@ -26,6 +22,7 @@ import {
|
|||
getGlobalSFPreferencesPath,
|
||||
loadEffectiveSFPreferences,
|
||||
} from "./preferences.js";
|
||||
import { getConfiguredEnvApiKey } from "./provider-env-auth.js";
|
||||
|
||||
const OPENCODE_FREE_MODEL_IDS = new Set([
|
||||
"big-pickle",
|
||||
|
|
@ -35,7 +32,6 @@ const OPENCODE_FREE_MODEL_IDS = new Set([
|
|||
]);
|
||||
const HIDDEN_MODEL_PROVIDERS = new Set([
|
||||
"claude-code",
|
||||
"google",
|
||||
"google-vertex",
|
||||
"groq",
|
||||
"github-copilot",
|
||||
|
|
@ -266,7 +262,7 @@ function resolveAutoBenchmarkPickForUnit(unitType, prefs) {
|
|||
const candidates = [];
|
||||
for (const provider of getProviders()) {
|
||||
if (!isProviderAllowedByLists(provider, allowed, blocked)) continue;
|
||||
if (!getEnvApiKey(provider)) continue;
|
||||
if (!getConfiguredEnvApiKey(provider)) continue;
|
||||
for (const model of getModels(provider)) {
|
||||
if (
|
||||
!isProviderModelAllowed(
|
||||
|
|
@ -296,7 +292,12 @@ function resolveAutoBenchmarkPickForUnit(unitType, prefs) {
|
|||
});
|
||||
if (!picked) return undefined;
|
||||
return { primary: picked.primary, fallbacks: picked.fallbacks };
|
||||
} catch {
|
||||
} catch (err) {
|
||||
if (process.env.SF_DEBUG_PREFERENCES_MODELS === "1") {
|
||||
console.warn(
|
||||
`preferences-models auto benchmark failed: ${err instanceof Error ? err.stack || err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
73
src/resources/extensions/sf/provider-env-auth.js
Normal file
73
src/resources/extensions/sf/provider-env-auth.js
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { getEnvApiKey } from "@singularity-forge/pi-ai";
|
||||
import {
|
||||
getAgentDir,
|
||||
SettingsManager,
|
||||
} from "@singularity-forge/pi-coding-agent";
|
||||
|
||||
const GOOGLE_ENV_AUTH_DEFAULT_OFF_PROVIDERS = new Set([
|
||||
"google",
|
||||
"google-gemini-cli",
|
||||
]);
|
||||
|
||||
function readJson(path) {
|
||||
try {
|
||||
if (!existsSync(path)) return {};
|
||||
return JSON.parse(readFileSync(path, "utf-8"));
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function readProviderEnvAuthSettings(cwd, agentDir) {
|
||||
const globalSettings = readJson(join(agentDir, "settings.json"));
|
||||
const projectSettings = readJson(join(cwd, ".sf", "settings.json"));
|
||||
return {
|
||||
...(globalSettings.providerEnvAuth ?? {}),
|
||||
...(projectSettings.providerEnvAuth ?? {}),
|
||||
providers: {
|
||||
...(globalSettings.providerEnvAuth?.providers ?? {}),
|
||||
...(projectSettings.providerEnvAuth?.providers ?? {}),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function getProviderEnvAuthMode(providerId, cwd) {
|
||||
const agentDir = getAgentDir();
|
||||
const settingsManager = SettingsManager.create(cwd, agentDir);
|
||||
if (typeof settingsManager.getProviderEnvAuthMode === "function") {
|
||||
return settingsManager.getProviderEnvAuthMode(providerId);
|
||||
}
|
||||
const settings = readProviderEnvAuthSettings(cwd, agentDir);
|
||||
return (
|
||||
settings.providers?.[providerId] ??
|
||||
settings.default ??
|
||||
(GOOGLE_ENV_AUTH_DEFAULT_OFF_PROVIDERS.has(providerId) ? "off" : "auto")
|
||||
);
|
||||
}
|
||||
|
||||
function getProviderEnvKey(providerId) {
|
||||
const apiKey = getEnvApiKey(providerId);
|
||||
if (apiKey) return apiKey;
|
||||
if (providerId === "google") {
|
||||
return (
|
||||
process.env.GEMINI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY
|
||||
);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the provider env API key only when Forge settings allow env auth.
|
||||
*
|
||||
* Purpose: keep SF extension-side provider heuristics aligned with the core
|
||||
* providerEnvAuth policy so ambient env keys do not bypass settings.json.
|
||||
*
|
||||
* Consumer: doctor-providers.js and preferences-models.js when checking whether
|
||||
* a provider is available from environment credentials.
|
||||
*/
|
||||
export function getConfiguredEnvApiKey(providerId, cwd = process.cwd()) {
|
||||
if (getProviderEnvAuthMode(providerId, cwd) === "off") return undefined;
|
||||
return getProviderEnvKey(providerId);
|
||||
}
|
||||
66
src/resources/extensions/sf/retrieval-evidence.js
Normal file
66
src/resources/extensions/sf/retrieval-evidence.js
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
/**
|
||||
* retrieval-evidence.js — DB-backed retrieval provenance helpers.
|
||||
*
|
||||
* Purpose: give local code, docs, and web retrieval tools one audit contract so
|
||||
* agents can distinguish live source evidence from stale or external context.
|
||||
*
|
||||
* Consumer: `sift_search`, `codebase_search`, and future Context7/web bridges.
|
||||
*/
|
||||
import { execFileSync } from "node:child_process";
|
||||
|
||||
function readGitValue(projectRoot, args) {
|
||||
try {
|
||||
return execFileSync("git", args, {
|
||||
cwd: projectRoot,
|
||||
encoding: "utf-8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
timeout: 2_000,
|
||||
}).trim();
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build best-effort git provenance for a retrieval event.
|
||||
* Purpose: label whether a context hit came from clean HEAD, a dirty worktree,
|
||||
* or an unknown non-git directory before it is trusted for planning.
|
||||
* Consumer: retrieval evidence writers.
|
||||
*/
|
||||
export function buildRetrievalProvenance(projectRoot) {
|
||||
const gitHead = readGitValue(projectRoot, ["rev-parse", "HEAD"]) || null;
|
||||
const gitBranch =
|
||||
readGitValue(projectRoot, ["branch", "--show-current"]) ||
|
||||
readGitValue(projectRoot, ["rev-parse", "--abbrev-ref", "HEAD"]) ||
|
||||
null;
|
||||
const status = readGitValue(projectRoot, ["status", "--porcelain"]);
|
||||
return {
|
||||
gitHead,
|
||||
gitBranch,
|
||||
worktreeDirty: status.length > 0,
|
||||
freshness: gitHead
|
||||
? status.length > 0
|
||||
? "working-tree"
|
||||
: "git-head"
|
||||
: "unknown",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Record retrieval evidence without making retrieval depend on DB availability.
|
||||
* Purpose: preserve provenance when the SF DB is open while letting search tools
|
||||
* degrade normally in standalone or early-startup contexts.
|
||||
* Consumer: local retrieval tool implementations.
|
||||
*/
|
||||
export async function recordRetrievalEvidence(projectRoot, entry) {
|
||||
try {
|
||||
const { insertRetrievalEvidence } = await import("./sf-db.js");
|
||||
insertRetrievalEvidence({
|
||||
projectRoot,
|
||||
...buildRetrievalProvenance(projectRoot),
|
||||
...entry,
|
||||
});
|
||||
} catch {
|
||||
// Retrieval evidence is best-effort; search results must remain usable.
|
||||
}
|
||||
}
|
||||
|
|
@ -78,7 +78,7 @@ function openRawDb(path) {
|
|||
loadProvider();
|
||||
return new DatabaseSync(path);
|
||||
}
|
||||
const SCHEMA_VERSION = 40;
|
||||
const SCHEMA_VERSION = 41;
|
||||
function indexExists(db, name) {
|
||||
return !!db
|
||||
.prepare(
|
||||
|
|
@ -319,6 +319,39 @@ function ensureSelfFeedbackTables(db) {
|
|||
"CREATE INDEX IF NOT EXISTS idx_self_feedback_kind ON self_feedback(kind, ts)",
|
||||
);
|
||||
}
|
||||
function ensureRetrievalEvidenceTables(db) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS retrieval_evidence (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
backend TEXT NOT NULL,
|
||||
source_kind TEXT NOT NULL DEFAULT 'code',
|
||||
query TEXT NOT NULL DEFAULT '',
|
||||
strategy TEXT NOT NULL DEFAULT '',
|
||||
scope TEXT NOT NULL DEFAULT '',
|
||||
project_root TEXT NOT NULL DEFAULT '',
|
||||
git_head TEXT DEFAULT NULL,
|
||||
git_branch TEXT DEFAULT NULL,
|
||||
worktree_dirty INTEGER NOT NULL DEFAULT 0,
|
||||
freshness TEXT NOT NULL DEFAULT 'unknown',
|
||||
status TEXT NOT NULL DEFAULT 'ok',
|
||||
hit_count INTEGER NOT NULL DEFAULT 0,
|
||||
elapsed_ms INTEGER NOT NULL DEFAULT 0,
|
||||
cache_path TEXT DEFAULT NULL,
|
||||
error TEXT DEFAULT NULL,
|
||||
result_json TEXT NOT NULL DEFAULT '{}',
|
||||
recorded_at TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_backend_recorded ON retrieval_evidence(backend, recorded_at DESC)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_scope_recorded ON retrieval_evidence(scope, recorded_at DESC)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_retrieval_evidence_status_recorded ON retrieval_evidence(status, recorded_at DESC)",
|
||||
);
|
||||
}
|
||||
function ensureSpecSchemaTables(db) {
|
||||
// Tier 1.3: Spec/Runtime/Evidence schema separation
|
||||
// Creates 9 normalized tables for milestone, slice, task entities
|
||||
|
|
@ -867,6 +900,7 @@ function initSchema(db, fileBacked) {
|
|||
`);
|
||||
ensureSelfFeedbackTables(db);
|
||||
ensureSolverEvalTables(db);
|
||||
ensureRetrievalEvidenceTables(db);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_memories_active ON memories(superseded_by)",
|
||||
);
|
||||
|
|
@ -940,6 +974,7 @@ function initSchema(db, fileBacked) {
|
|||
ensureHeadlessRunTables(db);
|
||||
ensureUokMessageTables(db);
|
||||
ensureSpecSchemaTables(db);
|
||||
ensureRetrievalEvidenceTables(db);
|
||||
db.exec(
|
||||
`CREATE VIEW IF NOT EXISTS active_decisions AS SELECT * FROM decisions WHERE superseded_by IS NULL`,
|
||||
);
|
||||
|
|
@ -2106,6 +2141,15 @@ function migrateSchema(db) {
|
|||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
if (currentVersion < 41) {
|
||||
ensureRetrievalEvidenceTables(db);
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
).run({
|
||||
":version": 41,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
db.exec("COMMIT");
|
||||
} catch (err) {
|
||||
db.exec("ROLLBACK");
|
||||
|
|
@ -6208,6 +6252,73 @@ export function getJudgmentsForUnit(unitIdPrefix, limit = 1000) {
|
|||
return [];
|
||||
}
|
||||
}
|
||||
// ─── Retrieval Evidence ─────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Record a retrieval lookup with source provenance.
|
||||
* Purpose: let SF compare live code, semantic, docs, and web context by the same
|
||||
* freshness and scope contract before planning or implementation trusts it.
|
||||
* Consumer: Sift/codebase search tools and future Context7/web retrieval bridges.
|
||||
*/
|
||||
export function insertRetrievalEvidence(args) {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
const now = args.recordedAt ?? new Date().toISOString();
|
||||
currentDb
|
||||
.prepare(`INSERT INTO retrieval_evidence (
|
||||
backend, source_kind, query, strategy, scope, project_root,
|
||||
git_head, git_branch, worktree_dirty, freshness, status,
|
||||
hit_count, elapsed_ms, cache_path, error, result_json, recorded_at
|
||||
) VALUES (
|
||||
:backend, :source_kind, :query, :strategy, :scope, :project_root,
|
||||
:git_head, :git_branch, :worktree_dirty, :freshness, :status,
|
||||
:hit_count, :elapsed_ms, :cache_path, :error, :result_json, :recorded_at
|
||||
)`)
|
||||
.run({
|
||||
":backend": args.backend,
|
||||
":source_kind": args.sourceKind ?? "code",
|
||||
":query": args.query ?? "",
|
||||
":strategy": args.strategy ?? "",
|
||||
":scope": args.scope ?? "",
|
||||
":project_root": args.projectRoot ?? "",
|
||||
":git_head": args.gitHead ?? null,
|
||||
":git_branch": args.gitBranch ?? null,
|
||||
":worktree_dirty": intBool(args.worktreeDirty),
|
||||
":freshness": args.freshness ?? "unknown",
|
||||
":status": args.status ?? "ok",
|
||||
":hit_count": args.hitCount ?? 0,
|
||||
":elapsed_ms": args.elapsedMs ?? 0,
|
||||
":cache_path": args.cachePath ?? null,
|
||||
":error": args.error ?? null,
|
||||
":result_json": JSON.stringify(args.result ?? {}),
|
||||
":recorded_at": now,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Return recent retrieval evidence rows.
|
||||
* Purpose: support audits that need to distinguish live source evidence from
|
||||
* stale indexed or prose-only context.
|
||||
* Consumer: inspect/doctor tooling and tests for retrieval provenance.
|
||||
*/
|
||||
export function getRetrievalEvidence(limit = 100) {
|
||||
if (!currentDb) return [];
|
||||
const rows = currentDb
|
||||
.prepare(`SELECT
|
||||
id, backend, source_kind AS sourceKind, query, strategy, scope,
|
||||
project_root AS projectRoot, git_head AS gitHead,
|
||||
git_branch AS gitBranch, worktree_dirty AS worktreeDirty,
|
||||
freshness, status, hit_count AS hitCount, elapsed_ms AS elapsedMs,
|
||||
cache_path AS cachePath, error, result_json AS resultJson, recorded_at AS recordedAt
|
||||
FROM retrieval_evidence
|
||||
ORDER BY recorded_at DESC, id DESC
|
||||
LIMIT :limit`)
|
||||
.all({ ":limit": limit });
|
||||
return rows.map((row) => ({
|
||||
...row,
|
||||
worktreeDirty: row.worktreeDirty === 1,
|
||||
result: parseJsonObject(row.resultJson, {}),
|
||||
}));
|
||||
}
|
||||
// ─── Memory Embeddings ───────────────────────────────────────────────────────
|
||||
export function upsertMemoryEmbedding(args) {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
|
|
|
|||
|
|
@ -0,0 +1,124 @@
|
|||
/**
|
||||
* context7-retrieval-evidence.test.mjs — Context7 provenance coverage.
|
||||
*
|
||||
* Purpose: prove external documentation lookups write DB evidence with backend,
|
||||
* scope, freshness, and result metadata before agents trust indexed docs.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
import registerContext7Extension from "../../context7/index.js";
|
||||
import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js";
|
||||
|
||||
const tmpRoots = [];
|
||||
const originalCwd = process.cwd();
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(originalCwd);
|
||||
closeDatabase();
|
||||
globalThis.fetch = originalFetch;
|
||||
for (const root of tmpRoots.splice(0)) {
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
function makeProject() {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-context7-evidence-"));
|
||||
tmpRoots.push(root);
|
||||
mkdirSync(join(root, ".sf"), { recursive: true });
|
||||
return root;
|
||||
}
|
||||
|
||||
function captureContext7Tools() {
|
||||
const tools = new Map();
|
||||
registerContext7Extension({
|
||||
on() {},
|
||||
registerTool(definition) {
|
||||
tools.set(definition.name, definition);
|
||||
},
|
||||
});
|
||||
return tools;
|
||||
}
|
||||
|
||||
test("resolve_library_when_successful_records_retrieval_evidence", async () => {
|
||||
const project = makeProject();
|
||||
process.chdir(project);
|
||||
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
|
||||
globalThis.fetch = async (url) => {
|
||||
assert.match(String(url), /\/libs\/search/);
|
||||
return {
|
||||
ok: true,
|
||||
json: async () => ({
|
||||
results: [
|
||||
{
|
||||
id: "/websites/example_docs",
|
||||
title: "Example Docs",
|
||||
trustScore: 9,
|
||||
benchmarkScore: 87.5,
|
||||
},
|
||||
],
|
||||
}),
|
||||
};
|
||||
};
|
||||
|
||||
const tools = captureContext7Tools();
|
||||
const result = await tools.get("resolve_library").execute("call-1", {
|
||||
libraryName: "example-docs-evidence",
|
||||
query: "routing",
|
||||
});
|
||||
|
||||
assert.equal(result.details.resultCount, 1);
|
||||
const rows = getRetrievalEvidence(5);
|
||||
assert.equal(rows.length, 1);
|
||||
assert.equal(rows[0].backend, "context7");
|
||||
assert.equal(rows[0].sourceKind, "docs");
|
||||
assert.equal(rows[0].strategy, "library-search");
|
||||
assert.equal(rows[0].scope, "example-docs-evidence");
|
||||
assert.equal(rows[0].freshness, "external-index");
|
||||
assert.equal(rows[0].status, "ok");
|
||||
assert.equal(rows[0].hitCount, 1);
|
||||
assert.deepEqual(rows[0].result.libraries, [
|
||||
{
|
||||
id: "/websites/example_docs",
|
||||
title: "Example Docs",
|
||||
trustScore: 9,
|
||||
benchmarkScore: 87.5,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test("get_library_docs_when_successful_records_retrieval_evidence", async () => {
|
||||
const project = makeProject();
|
||||
process.chdir(project);
|
||||
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
|
||||
globalThis.fetch = async (url) => {
|
||||
assert.match(String(url), /\/context/);
|
||||
return {
|
||||
ok: true,
|
||||
text: async () => "Example docs about routing.",
|
||||
};
|
||||
};
|
||||
|
||||
const tools = captureContext7Tools();
|
||||
const result = await tools.get("get_library_docs").execute("call-1", {
|
||||
libraryId: "/websites/example_docs",
|
||||
query: "routing",
|
||||
tokens: 500,
|
||||
});
|
||||
|
||||
assert.equal(result.details.charCount, "Example docs about routing.".length);
|
||||
const rows = getRetrievalEvidence(5);
|
||||
assert.equal(rows.length, 1);
|
||||
assert.equal(rows[0].backend, "context7");
|
||||
assert.equal(rows[0].sourceKind, "docs");
|
||||
assert.equal(rows[0].strategy, "docs-fetch");
|
||||
assert.equal(rows[0].scope, "/websites/example_docs");
|
||||
assert.equal(rows[0].freshness, "external-index");
|
||||
assert.equal(rows[0].status, "ok");
|
||||
assert.equal(rows[0].hitCount, 1);
|
||||
assert.equal(rows[0].result.tokens, 500);
|
||||
assert.equal(rows[0].result.charCount, "Example docs about routing.".length);
|
||||
});
|
||||
|
|
@ -24,6 +24,7 @@ function makePreferencesProject(globalPreferences) {
|
|||
const home = join(root, "home");
|
||||
const project = join(root, "project");
|
||||
mkdirSync(home, { recursive: true });
|
||||
mkdirSync(join(home, ".sf", "agent"), { recursive: true });
|
||||
mkdirSync(join(project, ".sf"), { recursive: true });
|
||||
writeFileSync(join(home, "preferences.md"), globalPreferences, "utf-8");
|
||||
writeFileSync(
|
||||
|
|
@ -32,6 +33,7 @@ function makePreferencesProject(globalPreferences) {
|
|||
"utf-8",
|
||||
);
|
||||
process.env.SF_HOME = home;
|
||||
process.env.HOME = home;
|
||||
process.chdir(project);
|
||||
return project;
|
||||
}
|
||||
|
|
@ -91,4 +93,59 @@ describe("doctor provider checks", () => {
|
|||
assert.equal(telegram?.status, "unconfigured");
|
||||
assert.equal(telegram?.required, false);
|
||||
});
|
||||
|
||||
test("runProviderChecks_when_google_env_auth_is_default_off_treats_google_as_missing_required_route", () => {
|
||||
makePreferencesProject(
|
||||
[
|
||||
"---",
|
||||
"version: 1",
|
||||
"models:",
|
||||
" planning: google/gemini-2.5-pro",
|
||||
"---",
|
||||
"",
|
||||
].join("\n"),
|
||||
);
|
||||
process.env.GEMINI_API_KEY = "test-google-key";
|
||||
|
||||
const results = runProviderChecks();
|
||||
const google = results.find((result) => result.name === "google");
|
||||
|
||||
assert.equal(google?.status, "error");
|
||||
});
|
||||
|
||||
test("runProviderChecks_when_google_env_auth_is_enabled_accepts_google_env_key", () => {
|
||||
const project = makePreferencesProject(
|
||||
[
|
||||
"---",
|
||||
"version: 1",
|
||||
"models:",
|
||||
" planning: google/gemini-2.5-pro",
|
||||
"---",
|
||||
"",
|
||||
].join("\n"),
|
||||
);
|
||||
mkdirSync(join(project, ".sf"), { recursive: true });
|
||||
writeFileSync(
|
||||
join(project, ".sf", "settings.json"),
|
||||
JSON.stringify(
|
||||
{
|
||||
providerEnvAuth: {
|
||||
providers: {
|
||||
google: "on",
|
||||
},
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf-8",
|
||||
);
|
||||
process.env.GEMINI_API_KEY = "test-google-key";
|
||||
|
||||
const results = runProviderChecks();
|
||||
const google = results.find((result) => result.name === "google");
|
||||
|
||||
assert.equal(google?.status, "ok");
|
||||
assert.equal(google?.source, "env");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,93 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, test } from "vitest";
|
||||
import { resolveModelWithFallbacksForUnit } from "../preferences-models.js";
|
||||
import { getConfiguredEnvApiKey } from "../provider-env-auth.js";
|
||||
|
||||
const originalCwd = process.cwd();
|
||||
const originalEnv = { ...process.env };
|
||||
const tmpDirs = [];
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(originalCwd);
|
||||
process.env = { ...originalEnv };
|
||||
while (tmpDirs.length > 0) {
|
||||
rmSync(tmpDirs.pop(), { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
function makePreferencesProject(projectPreferences, projectSettings) {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-preferences-models-"));
|
||||
tmpDirs.push(root);
|
||||
const home = join(root, "home");
|
||||
const project = join(root, "project");
|
||||
mkdirSync(join(home, ".sf", "agent"), { recursive: true });
|
||||
mkdirSync(join(project, ".sf"), { recursive: true });
|
||||
writeFileSync(
|
||||
join(project, ".sf", "PREFERENCES.md"),
|
||||
projectPreferences,
|
||||
"utf-8",
|
||||
);
|
||||
if (projectSettings) {
|
||||
writeFileSync(
|
||||
join(project, ".sf", "settings.json"),
|
||||
JSON.stringify(projectSettings, null, 2),
|
||||
"utf-8",
|
||||
);
|
||||
}
|
||||
process.env.HOME = home;
|
||||
process.chdir(project);
|
||||
return project;
|
||||
}
|
||||
|
||||
describe("preferences model resolution", () => {
|
||||
test("resolveModelWithFallbacksForUnit_when_google_env_auth_is_default_off_skips_google_auto_benchmark_candidates", () => {
|
||||
makePreferencesProject(
|
||||
[
|
||||
"---",
|
||||
"version: 1",
|
||||
"allowed_providers:",
|
||||
" - google",
|
||||
"models: {}",
|
||||
"---",
|
||||
"",
|
||||
].join("\n"),
|
||||
);
|
||||
process.env.GEMINI_API_KEY = "test-google-key";
|
||||
|
||||
const result = resolveModelWithFallbacksForUnit("plan-milestone");
|
||||
|
||||
assert.equal(result, undefined);
|
||||
});
|
||||
|
||||
test("resolveModelWithFallbacksForUnit_when_google_env_auth_is_enabled_uses_google_auto_benchmark_candidates", () => {
|
||||
makePreferencesProject(
|
||||
[
|
||||
"---",
|
||||
"version: 1",
|
||||
"allowed_providers:",
|
||||
" - google",
|
||||
"models: {}",
|
||||
"---",
|
||||
"",
|
||||
].join("\n"),
|
||||
{
|
||||
providerEnvAuth: {
|
||||
providers: {
|
||||
google: "on",
|
||||
},
|
||||
},
|
||||
},
|
||||
);
|
||||
process.env.GEMINI_API_KEY = "test-google-key";
|
||||
|
||||
assert.equal(getConfiguredEnvApiKey("google"), "test-google-key");
|
||||
const result = resolveModelWithFallbacksForUnit("plan-milestone");
|
||||
|
||||
assert.ok(result);
|
||||
assert.equal(typeof result.primary, "string");
|
||||
assert.ok(result.primary.length > 0);
|
||||
});
|
||||
});
|
||||
|
|
@ -14,9 +14,11 @@ import {
|
|||
closeDatabase,
|
||||
getDatabase,
|
||||
getJudgmentsForUnit,
|
||||
getRetrievalEvidence,
|
||||
getScheduleEntries,
|
||||
insertGateRun,
|
||||
insertJudgment,
|
||||
insertRetrievalEvidence,
|
||||
insertScheduleEntry,
|
||||
openDatabase,
|
||||
} from "../sf-db.js";
|
||||
|
|
@ -203,7 +205,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
|
|||
const version = db
|
||||
.prepare("SELECT MAX(version) AS version FROM schema_version")
|
||||
.get();
|
||||
assert.equal(version.version, 40);
|
||||
assert.equal(version.version, 41);
|
||||
const taskSpec = db
|
||||
.prepare(
|
||||
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",
|
||||
|
|
@ -343,3 +345,37 @@ test("openDatabase_judgments_table_round_trip", () => {
|
|||
assert.equal(t01.length, 1);
|
||||
assert.equal(t01[0].confidence, "high");
|
||||
});
|
||||
|
||||
test("openDatabase_retrieval_evidence_table_round_trip", () => {
|
||||
assert.equal(openDatabase(":memory:"), true);
|
||||
insertRetrievalEvidence({
|
||||
backend: "sift",
|
||||
sourceKind: "code",
|
||||
query: "approval policy",
|
||||
strategy: "bm25",
|
||||
scope: "src",
|
||||
projectRoot: "/repo",
|
||||
gitHead: "abc123",
|
||||
gitBranch: "main",
|
||||
worktreeDirty: true,
|
||||
freshness: "working-tree",
|
||||
status: "ok",
|
||||
hitCount: 1,
|
||||
elapsedMs: 42,
|
||||
cachePath: "/repo/.sf/runtime/sift/search-cache",
|
||||
result: { hits: [{ path: "src/index.ts", score: 0.9 }] },
|
||||
recordedAt: "2026-05-07T00:00:00.000Z",
|
||||
});
|
||||
|
||||
const rows = getRetrievalEvidence(10);
|
||||
assert.equal(rows.length, 1);
|
||||
assert.equal(rows[0].backend, "sift");
|
||||
assert.equal(rows[0].sourceKind, "code");
|
||||
assert.equal(rows[0].query, "approval policy");
|
||||
assert.equal(rows[0].scope, "src");
|
||||
assert.equal(rows[0].worktreeDirty, true);
|
||||
assert.equal(rows[0].freshness, "working-tree");
|
||||
assert.deepEqual(rows[0].result, {
|
||||
hits: [{ path: "src/index.ts", score: 0.9 }],
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,168 @@
|
|||
/**
|
||||
* sift-retrieval-evidence.test.mjs — Sift retrieval provenance coverage.
|
||||
*
|
||||
* Purpose: prove live code searches record DB evidence with backend, scope,
|
||||
* freshness, and result metadata so context tools can be audited consistently.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
chmodSync,
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
import registerSubagentExtension from "../../subagent/index.js";
|
||||
import { registerQueryTools } from "../bootstrap/query-tools.js";
|
||||
import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js";
|
||||
import { registerSiftSearchTool } from "../tools/sift-search-tool.js";
|
||||
|
||||
const tmpRoots = [];
|
||||
const originalCwd = process.cwd();
|
||||
const originalSiftPath = process.env.SIFT_PATH;
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(originalCwd);
|
||||
closeDatabase();
|
||||
if (originalSiftPath === undefined) {
|
||||
delete process.env.SIFT_PATH;
|
||||
} else {
|
||||
process.env.SIFT_PATH = originalSiftPath;
|
||||
}
|
||||
for (const root of tmpRoots.splice(0)) {
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
function makeProject() {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-sift-evidence-"));
|
||||
tmpRoots.push(root);
|
||||
mkdirSync(join(root, ".sf"), { recursive: true });
|
||||
mkdirSync(join(root, "src"), { recursive: true });
|
||||
writeFileSync(join(root, "src", "index.js"), "export const value = 1;\n");
|
||||
return root;
|
||||
}
|
||||
|
||||
function makeFakeSift() {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-sift-bin-"));
|
||||
tmpRoots.push(dir);
|
||||
const bin = join(dir, "sift");
|
||||
writeFileSync(
|
||||
bin,
|
||||
`#!/bin/sh
|
||||
printf '{"query":"approval policy","strategy":"bm25","hits":[{"path":"src/index.js","score":0.91,"content":"approval policy contract","line_start":1,"line_end":1}]}'`,
|
||||
);
|
||||
chmodSync(bin, 0o755);
|
||||
return bin;
|
||||
}
|
||||
|
||||
function captureTool() {
|
||||
let tool = null;
|
||||
registerSiftSearchTool({
|
||||
registerTool(definition) {
|
||||
tool = definition;
|
||||
},
|
||||
});
|
||||
assert.ok(tool, "sift_search tool should register");
|
||||
return tool;
|
||||
}
|
||||
|
||||
function captureQueryTool(name) {
|
||||
const tools = new Map();
|
||||
registerQueryTools({
|
||||
registerTool(definition) {
|
||||
tools.set(definition.name, definition);
|
||||
},
|
||||
});
|
||||
const tool = tools.get(name);
|
||||
assert.ok(tool, `${name} tool should register`);
|
||||
return tool;
|
||||
}
|
||||
|
||||
function captureSubagentTool(name) {
|
||||
const tools = new Map();
|
||||
registerSubagentExtension({
|
||||
on() {},
|
||||
registerCommand() {},
|
||||
registerTool(definition) {
|
||||
tools.set(definition.name, definition);
|
||||
},
|
||||
});
|
||||
const tool = tools.get(name);
|
||||
assert.ok(tool, `${name} tool should register`);
|
||||
return tool;
|
||||
}
|
||||
|
||||
test("sift_search_when_successful_records_retrieval_evidence", async () => {
|
||||
const project = makeProject();
|
||||
process.chdir(project);
|
||||
process.env.SIFT_PATH = makeFakeSift();
|
||||
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
|
||||
|
||||
const tool = captureTool();
|
||||
const result = await tool.execute("call-1", {
|
||||
query: "approval policy",
|
||||
path: "src",
|
||||
strategy: "bm25",
|
||||
limit: 5,
|
||||
});
|
||||
|
||||
assert.equal(result.isError, undefined);
|
||||
assert.equal(result.details.hitCount, 1);
|
||||
const rows = getRetrievalEvidence(5);
|
||||
assert.equal(rows.length, 1);
|
||||
assert.equal(rows[0].backend, "sift");
|
||||
assert.equal(rows[0].sourceKind, "code");
|
||||
assert.equal(rows[0].query, "approval policy");
|
||||
assert.equal(rows[0].strategy, "bm25");
|
||||
assert.equal(rows[0].scope, "src");
|
||||
assert.equal(rows[0].status, "ok");
|
||||
assert.equal(rows[0].hitCount, 1);
|
||||
assert.equal(rows[0].projectRoot, project);
|
||||
assert.match(rows[0].cachePath, /\.sf\/runtime\/sift\/search-cache$/);
|
||||
assert.deepEqual(rows[0].result.hits, [
|
||||
{
|
||||
path: "src/index.js",
|
||||
score: 0.91,
|
||||
lineStart: 1,
|
||||
lineEnd: 1,
|
||||
},
|
||||
]);
|
||||
|
||||
const queryTool = captureQueryTool("sf_retrieval_evidence");
|
||||
const queryResult = await queryTool.execute("call-2", { limit: 1 });
|
||||
assert.match(queryResult.content[0].text, /Retrieval evidence: 1 row/);
|
||||
assert.equal(queryResult.details.rows[0].backend, "sift");
|
||||
assert.equal(queryResult.details.rows[0].query, "approval policy");
|
||||
});
|
||||
|
||||
test("codebase_search_when_successful_records_retrieval_evidence", async () => {
|
||||
const project = makeProject();
|
||||
process.chdir(project);
|
||||
process.env.SIFT_PATH = makeFakeSift();
|
||||
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
|
||||
|
||||
const tool = captureSubagentTool("codebase_search");
|
||||
const result = await tool.execute("call-1", {
|
||||
query: "approval policy",
|
||||
scope: "src",
|
||||
strategy: "path-hybrid",
|
||||
timeoutMs: 10_000,
|
||||
});
|
||||
|
||||
assert.equal(result.details.operation, "codebase_search");
|
||||
assert.equal(result.details.exitCode, 0);
|
||||
const rows = getRetrievalEvidence(5);
|
||||
assert.equal(rows.length, 1);
|
||||
assert.equal(rows[0].backend, "codebase_search");
|
||||
assert.equal(rows[0].sourceKind, "code");
|
||||
assert.equal(rows[0].query, "approval policy");
|
||||
assert.equal(rows[0].strategy, "path-hybrid");
|
||||
assert.equal(rows[0].scope, "src");
|
||||
assert.equal(rows[0].status, "ok");
|
||||
assert.equal(rows[0].hitCount, 1);
|
||||
assert.match(rows[0].result.outputPreview, /approval policy contract/);
|
||||
});
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
/**
|
||||
* web-search-retrieval-evidence.test.mjs — web search provenance coverage.
|
||||
*
|
||||
* Purpose: prove current/external fact lookups write DB evidence before agents
|
||||
* use web results as planning input.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
import {
|
||||
registerSearchTool,
|
||||
resetSearchLoopGuardState,
|
||||
} from "../../search-the-web/tool-search.js";
|
||||
import { closeDatabase, getRetrievalEvidence, openDatabase } from "../sf-db.js";
|
||||
|
||||
const tmpRoots = [];
|
||||
const originalCwd = process.cwd();
|
||||
const originalFetch = globalThis.fetch;
|
||||
const originalBraveKey = process.env.BRAVE_API_KEY;
|
||||
const originalTavilyKey = process.env.TAVILY_API_KEY;
|
||||
const originalMiniMaxCodePlanKey = process.env.MINIMAX_CODE_PLAN_KEY;
|
||||
const originalMiniMaxCodingKey = process.env.MINIMAX_CODING_API_KEY;
|
||||
const originalMiniMaxKey = process.env.MINIMAX_API_KEY;
|
||||
const originalSerperKey = process.env.SERPER_API_KEY;
|
||||
const originalExaKey = process.env.EXA_API_KEY;
|
||||
const originalOllamaKey = process.env.OLLAMA_API_KEY;
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(originalCwd);
|
||||
closeDatabase();
|
||||
globalThis.fetch = originalFetch;
|
||||
if (originalBraveKey === undefined) {
|
||||
delete process.env.BRAVE_API_KEY;
|
||||
} else {
|
||||
process.env.BRAVE_API_KEY = originalBraveKey;
|
||||
}
|
||||
restoreEnv("TAVILY_API_KEY", originalTavilyKey);
|
||||
restoreEnv("MINIMAX_CODE_PLAN_KEY", originalMiniMaxCodePlanKey);
|
||||
restoreEnv("MINIMAX_CODING_API_KEY", originalMiniMaxCodingKey);
|
||||
restoreEnv("MINIMAX_API_KEY", originalMiniMaxKey);
|
||||
restoreEnv("SERPER_API_KEY", originalSerperKey);
|
||||
restoreEnv("EXA_API_KEY", originalExaKey);
|
||||
restoreEnv("OLLAMA_API_KEY", originalOllamaKey);
|
||||
resetSearchLoopGuardState();
|
||||
for (const root of tmpRoots.splice(0)) {
|
||||
rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
function restoreEnv(key, value) {
|
||||
if (value === undefined) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
function makeProject() {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-web-search-evidence-"));
|
||||
tmpRoots.push(root);
|
||||
mkdirSync(join(root, ".sf"), { recursive: true });
|
||||
return root;
|
||||
}
|
||||
|
||||
function captureSearchTool() {
|
||||
let tool = null;
|
||||
registerSearchTool({
|
||||
registerTool(definition) {
|
||||
tool = definition;
|
||||
},
|
||||
writeTempFile: async () => "/tmp/not-used",
|
||||
});
|
||||
assert.ok(tool, "search-the-web tool should register");
|
||||
return tool;
|
||||
}
|
||||
|
||||
test("search_the_web_when_successful_records_retrieval_evidence", async () => {
|
||||
const project = makeProject();
|
||||
process.chdir(project);
|
||||
process.env.BRAVE_API_KEY = "test-brave-key";
|
||||
process.env.TAVILY_API_KEY = "";
|
||||
process.env.MINIMAX_CODE_PLAN_KEY = "";
|
||||
process.env.MINIMAX_CODING_API_KEY = "";
|
||||
process.env.MINIMAX_API_KEY = "";
|
||||
process.env.SERPER_API_KEY = "";
|
||||
process.env.EXA_API_KEY = "";
|
||||
process.env.OLLAMA_API_KEY = "";
|
||||
assert.equal(openDatabase(join(project, ".sf", "sf.db")), true);
|
||||
globalThis.fetch = async (url) => {
|
||||
assert.match(String(url), /api\.search\.brave\.com/);
|
||||
return {
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
json: async () => ({
|
||||
web: {
|
||||
results: [
|
||||
{
|
||||
title: "Example Result",
|
||||
url: "https://example.com/result",
|
||||
description: "A result for testing.",
|
||||
},
|
||||
],
|
||||
},
|
||||
query: {
|
||||
original: "example query",
|
||||
altered: "example query",
|
||||
},
|
||||
}),
|
||||
};
|
||||
};
|
||||
|
||||
const tool = captureSearchTool();
|
||||
const result = await tool.execute("call-1", {
|
||||
query: "example query",
|
||||
count: 1,
|
||||
});
|
||||
|
||||
assert.equal(result.details.provider, "brave");
|
||||
assert.equal(result.details.count, 1);
|
||||
const rows = getRetrievalEvidence(5);
|
||||
assert.equal(rows.length, 1);
|
||||
assert.equal(rows[0].backend, "search-the-web");
|
||||
assert.equal(rows[0].sourceKind, "web");
|
||||
assert.equal(rows[0].strategy, "brave");
|
||||
assert.equal(rows[0].freshness, "external-live");
|
||||
assert.equal(rows[0].status, "ok");
|
||||
assert.equal(rows[0].hitCount, 1);
|
||||
assert.deepEqual(rows[0].result.results, [
|
||||
{
|
||||
title: "Example Result",
|
||||
url: "https://example.com/result",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
|
@ -17,6 +17,7 @@ import {
|
|||
resolveSiftBinary,
|
||||
resolveSiftSearchScope,
|
||||
} from "../code-intelligence.js";
|
||||
import { recordRetrievalEvidence } from "../retrieval-evidence.js";
|
||||
|
||||
const _KNOWN_STRATEGIES = [
|
||||
"hybrid",
|
||||
|
|
@ -263,20 +264,45 @@ export function registerSiftSearchTool(pi) {
|
|||
const elapsedMs = Date.now() - startedAt;
|
||||
const result = parseSiftOutput(stdout, stderr);
|
||||
const runtimeDirs = ensureSiftRuntimeDirs(projectRoot);
|
||||
|
||||
// Telemetry: log query outcomes for tuning
|
||||
const { logInfo } = await import("../workflow-logger.js");
|
||||
logInfo("sift_search", {
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "sift",
|
||||
sourceKind: "code",
|
||||
query: params.query,
|
||||
strategy: params.strategy ?? DEFAULT_STRATEGY,
|
||||
agent: params.agent ?? false,
|
||||
path: scope,
|
||||
scope,
|
||||
status: "ok",
|
||||
hitCount: result.hits.length,
|
||||
elapsedMs,
|
||||
binary: binaryPath,
|
||||
searchCache: runtimeDirs.searchCache,
|
||||
cachePath: runtimeDirs.searchCache,
|
||||
result: {
|
||||
hits: result.hits.map((hit) => ({
|
||||
path: hit.path,
|
||||
score: hit.score,
|
||||
lineStart: hit.lineStart,
|
||||
lineEnd: hit.lineEnd,
|
||||
})),
|
||||
agent: params.agent ?? false,
|
||||
agentMode: params.agentMode ?? null,
|
||||
plannerStrategy: params.plannerStrategy ?? null,
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
const { debugLog } = await import("../debug-logger.js");
|
||||
debugLog("sift_search", {
|
||||
query: params.query,
|
||||
strategy: params.strategy ?? DEFAULT_STRATEGY,
|
||||
agent: params.agent ?? false,
|
||||
path: scope,
|
||||
hitCount: result.hits.length,
|
||||
elapsedMs,
|
||||
binary: binaryPath,
|
||||
searchCache: runtimeDirs.searchCache,
|
||||
});
|
||||
} catch {
|
||||
// Telemetry must not change search semantics.
|
||||
}
|
||||
|
||||
const lines = [
|
||||
`Sift search: "${params.query}"`,
|
||||
`Strategy: ${params.strategy ?? DEFAULT_STRATEGY}${params.agent ? ` | agent: ${params.agentMode ?? "linear"} | planner: ${params.plannerStrategy ?? "heuristic"}` : ""}`,
|
||||
|
|
@ -319,6 +345,17 @@ export function registerSiftSearchTool(pi) {
|
|||
} catch (err) {
|
||||
const elapsedMs = Date.now() - startedAt;
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "sift",
|
||||
sourceKind: "code",
|
||||
query: params.query,
|
||||
strategy: params.strategy ?? DEFAULT_STRATEGY,
|
||||
scope,
|
||||
status: "error",
|
||||
hitCount: 0,
|
||||
elapsedMs,
|
||||
error: message,
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import {
|
|||
resolveSiftSearchScope,
|
||||
} from "../sf/code-intelligence.js";
|
||||
import { loadEffectiveSFPreferences } from "../sf/preferences.js";
|
||||
import { recordRetrievalEvidence } from "../sf/retrieval-evidence.js";
|
||||
import { formatTokenCount } from "../shared/mod.js";
|
||||
import { getCurrentPhase } from "../shared/sf-phase-state.js";
|
||||
import { discoverAgents } from "./agents.js";
|
||||
|
|
@ -2281,6 +2282,7 @@ export default function (pi) {
|
|||
const scope = resolveSiftSearchScope(projectRoot, params.scope);
|
||||
const strategy = params.strategy ?? "page-index-hybrid";
|
||||
const query = params.query;
|
||||
const startedAt = Date.now();
|
||||
const timeoutMs =
|
||||
typeof params.timeoutMs === "number" &&
|
||||
Number.isFinite(params.timeoutMs)
|
||||
|
|
@ -2288,6 +2290,17 @@ export default function (pi) {
|
|||
: CODEBASE_SEARCH_TIMEOUT_MS;
|
||||
const siftBin = resolveSiftBinary();
|
||||
if (!siftBin) {
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "codebase_search",
|
||||
sourceKind: "code",
|
||||
query,
|
||||
strategy,
|
||||
scope,
|
||||
status: "error",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
error: "sift binary not found",
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
@ -2367,6 +2380,22 @@ export default function (pi) {
|
|||
const text = timedOut
|
||||
? `Code search timed out after ${Math.round(timeoutMs / 1000)}s. Narrow the query or scope and retry.`
|
||||
: "Code search aborted.";
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "codebase_search",
|
||||
sourceKind: "code",
|
||||
query,
|
||||
strategy,
|
||||
scope,
|
||||
status: timedOut ? "timeout" : "aborted",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
cachePath: runtimeDirs.searchCache,
|
||||
error: text,
|
||||
result: {
|
||||
siftBin,
|
||||
timeoutMs,
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
@ -2396,6 +2425,23 @@ export default function (pi) {
|
|||
: err
|
||||
? `\n\nsift stderr: ${err.slice(0, 500)}`
|
||||
: "";
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "codebase_search",
|
||||
sourceKind: "code",
|
||||
query,
|
||||
strategy,
|
||||
scope,
|
||||
status: "error",
|
||||
hitCount: 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
cachePath: runtimeDirs.searchCache,
|
||||
error: err || `exit ${exitCode}`,
|
||||
result: {
|
||||
siftBin,
|
||||
exitCode,
|
||||
timeoutMs,
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
@ -2415,6 +2461,24 @@ export default function (pi) {
|
|||
},
|
||||
};
|
||||
}
|
||||
await recordRetrievalEvidence(projectRoot, {
|
||||
backend: "codebase_search",
|
||||
sourceKind: "code",
|
||||
query,
|
||||
strategy,
|
||||
scope,
|
||||
status: exitCode === 0 ? "ok" : "partial",
|
||||
hitCount: out.trim() ? 1 : 0,
|
||||
elapsedMs: Date.now() - startedAt,
|
||||
cachePath: runtimeDirs.searchCache,
|
||||
error: err || null,
|
||||
result: {
|
||||
siftBin,
|
||||
exitCode,
|
||||
timeoutMs,
|
||||
outputPreview: out.slice(0, 2_000),
|
||||
},
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue