feat: improve sf runtime self-reload and safeguards
This commit is contained in:
parent
c5e9e4f9c8
commit
e4c951ff0c
44 changed files with 3411 additions and 146 deletions
43
docs/plans/uok-droid-accounting-integration.md
Normal file
43
docs/plans/uok-droid-accounting-integration.md
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# UOK Droid Accounting Integration
|
||||
|
||||
## Purpose
|
||||
|
||||
Capture the useful Droid mission-accounting patterns as SF-native UOK contracts without importing Droid's model configuration, file-first state model, or mission runtime shape.
|
||||
|
||||
## Adopted Value
|
||||
|
||||
- Role-based model policy: durable intent is `auto` plus symbolic constraints, never concrete provider/model IDs.
|
||||
- Model route evidence: runtime-selected provider/model is recorded as evidence after SF's existing auto selector runs.
|
||||
- Secret-safe model route snapshots: reproducibility metadata is retained while API keys, headers, and tokens are redacted.
|
||||
- Typed progress events: stable machine event names such as `unit_selected`, `model_auto_resolved`, `unit_completed`, and `unit_blocked`.
|
||||
- Structured unit handoff: closeout fields for changed files, tests, commands, failures, leftover work, verification status, and fulfilled assertions.
|
||||
- Assertion coverage: compare required validation assertions with fulfilled handoff assertions.
|
||||
- Worker lineage: track worker session IDs, current/completed/failed session IDs, and lifecycle events.
|
||||
- Tool command registry: worker-visible command allowlists are explicit; Droid-style `services` is treated only as a legacy daemon-service alias.
|
||||
|
||||
## Live Now
|
||||
|
||||
- `model_auto_resolved`, `unit_selected`, `unit_completed`, and `unit_blocked` progress events are emitted into the existing journal stream alongside legacy events.
|
||||
- `model-auto-resolved` UOK audit payloads include secret-safe route snapshots.
|
||||
- UOK outcome memory recording uses the real `createMemory({ ... })` API and records source unit metadata.
|
||||
- Focused tests cover the projection/accounting helpers and the UOK memory integration.
|
||||
|
||||
## Deliberately Deferred
|
||||
|
||||
- No DB migration yet. These are projections and contracts until real query/recovery needs prove first-class rows are worth it.
|
||||
- No hard closeout gate yet. `assessAssertionCoverage()` exists, but completion semantics should change only with a focused gate test.
|
||||
- No context-pack renderer yet. `buildWorkerContextPackProjection()` is a shape builder, not the prompt renderer.
|
||||
- No command enforcement yet. `normalizeToolCommandRegistry()` constrains advertised tools/commands; execution enforcement must wire through existing permission/run-control paths.
|
||||
- No new model router. SF's existing auto model selector remains the only route resolver.
|
||||
|
||||
## Next Integration Order
|
||||
|
||||
1. Persist worker lineage from real dispatch lifecycle events.
|
||||
2. Add assertion coverage as a closeout gate before marking a unit complete.
|
||||
3. Render worker context packs from existing SF DB/runtime state.
|
||||
4. Feed the tool command registry into worker prompts.
|
||||
5. Promote selected projection fields into SQLite only after dashboards/recovery flows need durable queries.
|
||||
|
||||
## Stop Rule
|
||||
|
||||
Do not add more schema helpers for this Droid accounting thread until an existing SF runtime path needs a concrete caller. The next work should be integration into existing UOK dispatch, closeout, prompt, or recovery flows.
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { test } from "vitest";
|
||||
|
||||
const source = readFileSync(
|
||||
join(
|
||||
process.cwd(),
|
||||
"packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts",
|
||||
),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
test("interactive_tui_autoreload_uses_existing_reload_path", () => {
|
||||
assert.match(
|
||||
source,
|
||||
/Purpose: make the TUI pick up SF's own code\/resource fixes/,
|
||||
);
|
||||
assert.match(source, /private startAutoReloadWatcher\(\): void/);
|
||||
assert.match(source, /private async checkAutoReload\(\): Promise<void>/);
|
||||
assert.match(source, /await this\.handleReloadCommand\(\)/);
|
||||
});
|
||||
|
||||
test("interactive_tui_runtime_reload_exits_for_launcher_restart", () => {
|
||||
const reloadStart = source.indexOf("private async handleReloadCommand()");
|
||||
assert.ok(reloadStart >= 0, "handleReloadCommand should exist");
|
||||
const reloadBody = source.slice(reloadStart, reloadStart + 1200);
|
||||
|
||||
assert.match(reloadBody, /computeInteractiveRuntimeFingerprint\(\)/);
|
||||
assert.match(reloadBody, /process\.exit\(INTERACTIVE_RELOAD_EXIT_CODE\)/);
|
||||
});
|
||||
|
|
@ -15,11 +15,12 @@ function renderTool(
|
|||
details?: Record<string, unknown>;
|
||||
},
|
||||
toolDefinition?: { label?: string },
|
||||
options: { startedAt?: number } = {},
|
||||
): string {
|
||||
const component = new ToolExecutionComponent(
|
||||
toolName,
|
||||
args,
|
||||
{},
|
||||
options,
|
||||
toolDefinition as any,
|
||||
{ requestRender() {} } as any,
|
||||
);
|
||||
|
|
@ -138,4 +139,17 @@ describe("ToolExecutionComponent", () => {
|
|||
assert.match(rendered, /Plan Milestone/);
|
||||
assert.doesNotMatch(rendered, /sf_plan_milestone/);
|
||||
});
|
||||
|
||||
test("tool frame header includes ISO minute timestamp", () => {
|
||||
const startedAt = new Date(2026, 4, 8, 20, 51, 0).getTime();
|
||||
const rendered = renderTool(
|
||||
"Read",
|
||||
{ path: "/tmp/demo.txt" },
|
||||
undefined,
|
||||
undefined,
|
||||
{ startedAt },
|
||||
);
|
||||
|
||||
assert.match(rendered, /2026-05-08 20:51/);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ export class AssistantMessageComponent extends Container {
|
|||
}
|
||||
}
|
||||
|
||||
if (message.stopReason && message.timestamp) {
|
||||
if (!hasToolContent && message.stopReason && message.timestamp) {
|
||||
const timeStr = formatTimestamp(
|
||||
message.timestamp,
|
||||
this.timestampFormat,
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import { getLanguageFromPath, highlightCode, theme } from "../theme/theme.js";
|
|||
import { shortenPath } from "../utils/shorten-path.js";
|
||||
import { renderDiff } from "./diff.js";
|
||||
import { keyHint } from "./keybinding-hints.js";
|
||||
import { formatTimestamp } from "./timestamp.js";
|
||||
import { truncateToVisualLines } from "./visual-truncate.js";
|
||||
|
||||
// Preview line limit for bash when not expanded
|
||||
|
|
@ -111,6 +112,7 @@ function renderToolFrame(
|
|||
status: string;
|
||||
tone: ToolFrameTone;
|
||||
modelLabel?: string;
|
||||
startedAt?: number;
|
||||
},
|
||||
): string[] {
|
||||
const outerWidth = Math.max(20, width);
|
||||
|
|
@ -130,6 +132,9 @@ function renderToolFrame(
|
|||
const leftStyled = theme.fg(labelColor, theme.bold(`• ${opts.label}`));
|
||||
const statusStyled = theme.fg(statusColor, opts.status);
|
||||
let rightStyled = statusStyled;
|
||||
if (opts.startedAt !== undefined) {
|
||||
rightStyled = `${theme.fg("dim", formatTimestamp(opts.startedAt, "date-time-iso"))}${theme.fg("dim", " · ")}${rightStyled}`;
|
||||
}
|
||||
if (opts.modelLabel) {
|
||||
const separatorStyled = theme.fg("dim", " · ");
|
||||
const modelWidth =
|
||||
|
|
@ -215,6 +220,7 @@ function formatCompactArgs(args: unknown, expanded: boolean): string {
|
|||
export interface ToolExecutionOptions {
|
||||
showImages?: boolean; // default: true (only used if terminal supports images)
|
||||
modelLabel?: string;
|
||||
startedAt?: number;
|
||||
}
|
||||
|
||||
type WriteHighlightCache = {
|
||||
|
|
@ -238,6 +244,7 @@ export class ToolExecutionComponent extends Container {
|
|||
private expanded = false;
|
||||
private showImages: boolean;
|
||||
private modelLabel?: string;
|
||||
private startedAt: number;
|
||||
private isPartial = true;
|
||||
private toolDefinition?: ToolDefinition;
|
||||
private ui: TUI;
|
||||
|
|
@ -283,6 +290,7 @@ export class ToolExecutionComponent extends Container {
|
|||
this.args = args;
|
||||
this.showImages = options.showImages ?? true;
|
||||
this.modelLabel = options.modelLabel?.trim() || undefined;
|
||||
this.startedAt = options.startedAt ?? Date.now();
|
||||
this.toolDefinition = toolDefinition;
|
||||
this.ui = ui;
|
||||
this.cwd = cwd;
|
||||
|
|
@ -619,6 +627,7 @@ export class ToolExecutionComponent extends Container {
|
|||
status: frameStatus,
|
||||
tone: frameTone,
|
||||
modelLabel: this.modelLabel,
|
||||
startedAt: this.startedAt,
|
||||
});
|
||||
return framed.length > 0 ? ["", ...framed] : framed;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -301,6 +301,7 @@ export async function handleAgentEvent(
|
|||
{
|
||||
showImages: host.settingsManager.getShowImages(),
|
||||
modelLabel: modelLabelFromAssistant(host.streamingMessage),
|
||||
startedAt: host.streamingMessage.timestamp,
|
||||
},
|
||||
host.getRegisteredToolDefinition(content.name),
|
||||
host.ui,
|
||||
|
|
@ -319,6 +320,7 @@ export async function handleAgentEvent(
|
|||
{
|
||||
showImages: host.settingsManager.getShowImages(),
|
||||
modelLabel: modelLabelFromAssistant(host.streamingMessage),
|
||||
startedAt: host.streamingMessage.timestamp,
|
||||
},
|
||||
undefined,
|
||||
host.ui,
|
||||
|
|
@ -767,6 +769,7 @@ export async function handleAgentEvent(
|
|||
{
|
||||
showImages: host.settingsManager.getShowImages(),
|
||||
modelLabel: modelLabelFromHost(host),
|
||||
startedAt: Date.now(),
|
||||
},
|
||||
host.getRegisteredToolDefinition(event.toolName),
|
||||
host.ui,
|
||||
|
|
|
|||
|
|
@ -198,6 +198,111 @@ function computeInteractiveRuntimeFingerprint(): string {
|
|||
return hash.digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
const AUTO_RELOAD_INTERVAL_MS = 2_500;
|
||||
const AUTO_RELOAD_RESOURCE_EXTENSIONS = new Set([
|
||||
".cjs",
|
||||
".js",
|
||||
".json",
|
||||
".md",
|
||||
".mjs",
|
||||
".ts",
|
||||
".tsx",
|
||||
".yaml",
|
||||
".yml",
|
||||
]);
|
||||
const AUTO_RELOAD_IGNORED_DIRS = new Set([
|
||||
".git",
|
||||
"node_modules",
|
||||
"dist",
|
||||
"target",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Collect reload-relevant files under a runtime resource path.
|
||||
*
|
||||
* Purpose: let the TUI notice self-improvement edits to loaded extensions,
|
||||
* skills, prompts, and themes without asking the user to run `/reload`.
|
||||
*
|
||||
* Consumer: computeInteractiveResourceFingerprint() during the TUI autoreload
|
||||
* polling loop.
|
||||
*/
|
||||
function collectInteractiveResourceFiles(resourcePath: string): string[] {
|
||||
let stat: fs.Stats;
|
||||
try {
|
||||
stat = fs.statSync(resourcePath);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (stat.isFile()) {
|
||||
return AUTO_RELOAD_RESOURCE_EXTENSIONS.has(path.extname(resourcePath))
|
||||
? [resourcePath]
|
||||
: [];
|
||||
}
|
||||
if (!stat.isDirectory()) return [];
|
||||
|
||||
const files: string[] = [];
|
||||
const stack = [resourcePath];
|
||||
while (stack.length > 0) {
|
||||
const current = stack.pop();
|
||||
if (!current) continue;
|
||||
|
||||
let entries: fs.Dirent[];
|
||||
try {
|
||||
entries = fs.readdirSync(current, { withFileTypes: true });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(current, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (!AUTO_RELOAD_IGNORED_DIRS.has(entry.name)) stack.push(fullPath);
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
entry.isFile() &&
|
||||
AUTO_RELOAD_RESOURCE_EXTENSIONS.has(path.extname(entry.name))
|
||||
) {
|
||||
files.push(fullPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hash the loaded resource file set using paths, mtimes, and sizes.
|
||||
*
|
||||
* Purpose: detect changed extension/skill/prompt/theme resources cheaply enough
|
||||
* to poll in an interactive TUI, while avoiding expensive full-content hashing.
|
||||
*
|
||||
* Consumer: InteractiveMode.startAutoReloadWatcher().
|
||||
*/
|
||||
function computeInteractiveResourceFingerprint(
|
||||
resourcePaths: Iterable<string>,
|
||||
): string {
|
||||
const files = [
|
||||
...new Set([...resourcePaths].flatMap(collectInteractiveResourceFiles)),
|
||||
].sort();
|
||||
const hash = crypto.createHash("sha256");
|
||||
for (const file of files) {
|
||||
try {
|
||||
const stat = fs.statSync(file);
|
||||
hash.update(file);
|
||||
hash.update("\0");
|
||||
hash.update(String(stat.mtimeMs));
|
||||
hash.update("\0");
|
||||
hash.update(String(stat.size));
|
||||
hash.update("\0");
|
||||
} catch {
|
||||
hash.update(file);
|
||||
hash.update("\0missing\0");
|
||||
}
|
||||
}
|
||||
return hash.digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for InteractiveMode initialization.
|
||||
*/
|
||||
|
|
@ -246,6 +351,10 @@ export class InteractiveMode {
|
|||
private isInitialized = false;
|
||||
private readonly processRestartFingerprint =
|
||||
computeInteractiveRuntimeFingerprint();
|
||||
private resourceReloadFingerprint: string | undefined;
|
||||
private autoReloadTimer: NodeJS.Timeout | undefined;
|
||||
private autoReloadInProgress = false;
|
||||
private autoReloadPendingReason: string | undefined;
|
||||
private onInputCallback?: (text: string) => void;
|
||||
private loadingAnimation: Loader | undefined = undefined;
|
||||
private readonly defaultWorkingMessage = "Working...";
|
||||
|
|
@ -665,6 +774,7 @@ export class InteractiveMode {
|
|||
this._branchChangeUnsub = this.footerDataProvider.onBranchChange(() => {
|
||||
this.ui.requestRender();
|
||||
});
|
||||
this.startAutoReloadWatcher();
|
||||
|
||||
// Initialize available provider count for footer display
|
||||
await this.updateAvailableProviderCount();
|
||||
|
|
@ -2638,7 +2748,11 @@ export class InteractiveMode {
|
|||
const component = new ToolExecutionComponent(
|
||||
content.name,
|
||||
content.arguments,
|
||||
{ showImages: this.settingsManager.getShowImages(), modelLabel },
|
||||
{
|
||||
showImages: this.settingsManager.getShowImages(),
|
||||
modelLabel,
|
||||
startedAt: message.timestamp,
|
||||
},
|
||||
this.getRegisteredToolDefinition(content.name),
|
||||
this.ui,
|
||||
);
|
||||
|
|
@ -2671,7 +2785,11 @@ export class InteractiveMode {
|
|||
const component = new ToolExecutionComponent(
|
||||
content.name,
|
||||
content.input ?? {},
|
||||
{ showImages: this.settingsManager.getShowImages(), modelLabel },
|
||||
{
|
||||
showImages: this.settingsManager.getShowImages(),
|
||||
modelLabel,
|
||||
startedAt: message.timestamp,
|
||||
},
|
||||
undefined,
|
||||
this.ui,
|
||||
);
|
||||
|
|
@ -3744,6 +3862,105 @@ export class InteractiveMode {
|
|||
// Command handlers
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Start polling loaded runtime resources for self-improvement changes.
|
||||
*
|
||||
* Purpose: make the TUI pick up SF's own code/resource fixes as soon as it
|
||||
* is idle, instead of requiring a human to type `/reload` after an agent
|
||||
* updates extensions, skills, prompts, themes, or restart-sensitive runtime
|
||||
* modules.
|
||||
*
|
||||
* Consumer: initialize() after extension and branch watchers are installed.
|
||||
*/
|
||||
private startAutoReloadWatcher(): void {
|
||||
if (process.env.SF_TUI_AUTORELOAD === "0") return;
|
||||
if (this.autoReloadTimer) return;
|
||||
|
||||
this.refreshAutoReloadResourceFingerprint();
|
||||
this.autoReloadTimer = setInterval(() => {
|
||||
void this.checkAutoReload();
|
||||
}, AUTO_RELOAD_INTERVAL_MS);
|
||||
this.autoReloadTimer.unref?.();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the TUI autoreload watcher.
|
||||
*
|
||||
* Purpose: avoid a background interval keeping the process alive or firing
|
||||
* after the UI has been stopped during shutdown/reload.
|
||||
*
|
||||
* Consumer: stop().
|
||||
*/
|
||||
private stopAutoReloadWatcher(): void {
|
||||
if (!this.autoReloadTimer) return;
|
||||
clearInterval(this.autoReloadTimer);
|
||||
this.autoReloadTimer = undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Snapshot the currently loaded extension/skill/prompt/theme resources.
|
||||
*
|
||||
* Purpose: establish the post-load baseline used to tell whether a future
|
||||
* self-improvement changed the runtime resources that `/reload` would refresh.
|
||||
*
|
||||
* Consumer: startAutoReloadWatcher(), handleReloadCommand().
|
||||
*/
|
||||
private refreshAutoReloadResourceFingerprint(): void {
|
||||
this.resourceReloadFingerprint = computeInteractiveResourceFingerprint(
|
||||
this.session.resourceLoader.getPathMetadata().keys(),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether runtime or resource files changed and reload when idle.
|
||||
*
|
||||
* Purpose: turn self-improvement writes into an automatic reload/restart as
|
||||
* soon as the TUI can safely do it, while avoiding interruption during an
|
||||
* active model stream or compaction.
|
||||
*
|
||||
* Consumer: startAutoReloadWatcher() interval callback.
|
||||
*/
|
||||
private async checkAutoReload(): Promise<void> {
|
||||
if (this.autoReloadInProgress) return;
|
||||
|
||||
const runtimeChanged =
|
||||
computeInteractiveRuntimeFingerprint() !== this.processRestartFingerprint;
|
||||
const resourceFingerprint = computeInteractiveResourceFingerprint(
|
||||
this.session.resourceLoader.getPathMetadata().keys(),
|
||||
);
|
||||
const resourcesChanged =
|
||||
this.resourceReloadFingerprint !== undefined &&
|
||||
resourceFingerprint !== this.resourceReloadFingerprint;
|
||||
|
||||
if (!runtimeChanged && !resourcesChanged && !this.autoReloadPendingReason) {
|
||||
return;
|
||||
}
|
||||
|
||||
const reason =
|
||||
this.autoReloadPendingReason ??
|
||||
(runtimeChanged
|
||||
? "runtime changed on disk"
|
||||
: "resources changed on disk");
|
||||
if (this.session.isStreaming || this.session.isCompacting) {
|
||||
this.autoReloadPendingReason = reason;
|
||||
return;
|
||||
}
|
||||
|
||||
this.autoReloadInProgress = true;
|
||||
this.autoReloadPendingReason = undefined;
|
||||
try {
|
||||
this.showStatus(`Auto-reload: ${reason}; reloading SF...`);
|
||||
this.ui.requestRender();
|
||||
await this.handleReloadCommand();
|
||||
this.refreshAutoReloadResourceFingerprint();
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
this.showWarning(`Auto-reload failed: ${message}`);
|
||||
} finally {
|
||||
this.autoReloadInProgress = false;
|
||||
}
|
||||
}
|
||||
|
||||
private async handleReloadCommand(): Promise<void> {
|
||||
if (this.session.isStreaming) {
|
||||
this.showWarning(
|
||||
|
|
@ -3791,6 +4008,7 @@ export class InteractiveMode {
|
|||
|
||||
try {
|
||||
await this.session.reload();
|
||||
this.refreshAutoReloadResourceFingerprint();
|
||||
setRegisteredThemes(this.session.resourceLoader.getThemes().themes);
|
||||
this.hideThinkingBlock = this.settingsManager.getHideThinkingBlock();
|
||||
const themeName = this.settingsManager.getTheme();
|
||||
|
|
@ -4094,6 +4312,7 @@ export class InteractiveMode {
|
|||
// Clean up branch change listener (Fix 1)
|
||||
this._branchChangeUnsub?.();
|
||||
this._branchChangeUnsub = undefined;
|
||||
this.stopAutoReloadWatcher();
|
||||
|
||||
// Clean up theme change listener and watcher (Fix 2)
|
||||
onThemeChange(() => {});
|
||||
|
|
|
|||
|
|
@ -273,6 +273,18 @@ export class TUI extends Container {
|
|||
private stopped = false;
|
||||
private _lastRenderedComponents: string[] | null = null;
|
||||
|
||||
// === Sticky bottom scrolling ===
|
||||
private isScrolledToBottom = true; // Track if user is scrolled to bottom
|
||||
|
||||
// === Autonomous mode info bar ===
|
||||
public autonomousStatus?: {
|
||||
currentSlice?: string;
|
||||
sliceStatus?: string;
|
||||
progress?: number;
|
||||
totalTasks?: number;
|
||||
completedTasks?: number;
|
||||
};
|
||||
|
||||
// Overlay stack for modal components rendered on top of base content
|
||||
private focusOrderCounter = 0;
|
||||
private overlayStack: {
|
||||
|
|
@ -547,6 +559,101 @@ export class TUI extends Container {
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if user is scrolled to the bottom of the content
|
||||
*/
|
||||
private isAtBottom(): boolean {
|
||||
const height = this.terminal.rows;
|
||||
const viewportTop = Math.max(0, this.maxLinesRendered - height);
|
||||
const viewportBottom = viewportTop + height;
|
||||
return viewportBottom >= this.previousLines.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scroll to bottom of content (sticky bottom)
|
||||
*/
|
||||
private scrollToBottom(): void {
|
||||
const height = this.terminal.rows;
|
||||
const contentHeight = this.previousLines.length;
|
||||
if (contentHeight <= height) return; // No scrolling needed if content fits in viewport
|
||||
|
||||
// For terminal scrolling, we can use cursor movement or scroll sequences
|
||||
// The simplest approach is to move the cursor to the bottom line
|
||||
const viewportTop = Math.max(0, contentHeight - height);
|
||||
const targetScreenRow = contentHeight - 1;
|
||||
const currentScreenRow = this.hardwareCursorRow - this.previousViewportTop;
|
||||
const lineDiff = targetScreenRow - currentScreenRow;
|
||||
|
||||
if (lineDiff > 0) {
|
||||
this.terminal.write(`\x1b[${lineDiff}B`); // Move cursor down
|
||||
} else if (lineDiff < 0) {
|
||||
this.terminal.write(`\x1b[${-lineDiff}A`); // Move cursor up
|
||||
}
|
||||
|
||||
this.previousViewportTop = viewportTop;
|
||||
this.isScrolledToBottom = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update autonomous status information
|
||||
*/
|
||||
updateAutonomousStatus(status: {
|
||||
currentSlice?: string;
|
||||
sliceStatus?: string;
|
||||
progress?: number;
|
||||
totalTasks?: number;
|
||||
completedTasks?: number;
|
||||
}): void {
|
||||
this.autonomousStatus = status;
|
||||
this.requestRender();
|
||||
}
|
||||
|
||||
/**
|
||||
* Render autonomous mode info bar
|
||||
*/
|
||||
private renderAutonomousStatusBar(width: number): string[] {
|
||||
if (!this.autonomousStatus) return [];
|
||||
|
||||
const { currentSlice, sliceStatus, progress, totalTasks, completedTasks } =
|
||||
this.autonomousStatus;
|
||||
const lines: string[] = [];
|
||||
|
||||
// Create status bar line
|
||||
let statusLine = "\x1b[90m│ AUTONOMOUS MODE ";
|
||||
|
||||
if (currentSlice) {
|
||||
statusLine += `\x1b[97mSlice: \x1b[96m${currentSlice} `;
|
||||
}
|
||||
|
||||
if (sliceStatus) {
|
||||
statusLine += `\x1b[97mStatus: \x1b[92m${sliceStatus} `;
|
||||
}
|
||||
|
||||
if (progress !== undefined) {
|
||||
const progressBar = this.createProgressBar(progress, width - 30);
|
||||
statusLine += `\x1b[97mProgress: \x1b[93m${progressBar} `;
|
||||
}
|
||||
|
||||
if (totalTasks !== undefined && completedTasks !== undefined) {
|
||||
statusLine += `\x1b[97mTasks: \x1b[95m${completedTasks}/${totalTasks} `;
|
||||
}
|
||||
|
||||
statusLine += "\x1b[90m│\x1b[0m";
|
||||
lines.push(statusLine);
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a simple ASCII progress bar
|
||||
*/
|
||||
private createProgressBar(progress: number, width: number): string {
|
||||
const barWidth = Math.min(20, Math.max(5, width));
|
||||
const filled = Math.floor((progress / 100) * barWidth);
|
||||
const empty = barWidth - filled;
|
||||
return `[${"█".repeat(filled)}${"░".repeat(empty)}] ${progress}%`;
|
||||
}
|
||||
|
||||
private handleInput(data: string): void {
|
||||
if (this.inputListeners.size > 0) {
|
||||
let current = data;
|
||||
|
|
@ -579,6 +686,14 @@ export class TUI extends Container {
|
|||
return;
|
||||
}
|
||||
|
||||
// Detect scrolling keys (Page Up/Down, arrow keys) to break sticky bottom
|
||||
if (
|
||||
this.isScrolledToBottom &&
|
||||
(matchesKey(data, "pageUp") || matchesKey(data, "up"))
|
||||
) {
|
||||
this.isScrolledToBottom = false;
|
||||
}
|
||||
|
||||
// If focused component is an overlay, verify it's still visible
|
||||
// (visibility can change due to terminal resize or visible() callback)
|
||||
const focusedOverlay = this.overlayStack.find(
|
||||
|
|
@ -595,6 +710,17 @@ export class TUI extends Container {
|
|||
}
|
||||
}
|
||||
|
||||
// Enter key scrolling behavior: if not at bottom, scroll down instead of sending input
|
||||
if (data === "\r" || data === "\n") {
|
||||
// Enter key
|
||||
if (!this.isAtBottom()) {
|
||||
// Scroll down one page or to bottom
|
||||
this.scrollToBottom();
|
||||
return;
|
||||
}
|
||||
// If we're at bottom, let Enter pass through to focused component
|
||||
}
|
||||
|
||||
// Pass input to focused component (including Ctrl+C)
|
||||
// The focused component can decide how to handle Ctrl+C
|
||||
if (this.focusedComponent?.handleInput) {
|
||||
|
|
@ -674,11 +800,22 @@ export class TUI extends Container {
|
|||
// Render all components to get new lines
|
||||
let newLines = this.render(width);
|
||||
|
||||
// Add autonomous status bar at the top if in autonomous mode
|
||||
const statusBarLines = this.renderAutonomousStatusBar(width);
|
||||
if (statusBarLines.length > 0) {
|
||||
newLines = [...statusBarLines, ...newLines];
|
||||
}
|
||||
|
||||
// Check if content grew and we should scroll to bottom (sticky bottom behavior)
|
||||
const contentGrew = newLines.length > this.previousLines.length;
|
||||
const shouldScrollToBottom = contentGrew && this.isScrolledToBottom;
|
||||
|
||||
// Skip ALL post-processing if component output is unchanged.
|
||||
// Container.render() returns the same array reference when stable.
|
||||
if (
|
||||
newLines === this._lastRenderedComponents &&
|
||||
this.overlayStack.length === 0
|
||||
this.overlayStack.length === 0 &&
|
||||
!shouldScrollToBottom
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -997,6 +1134,11 @@ export class TUI extends Container {
|
|||
this.maxLinesRendered = Math.max(this.maxLinesRendered, newLines.length);
|
||||
this.previousViewportTop = Math.max(0, this.maxLinesRendered - height);
|
||||
|
||||
// Apply sticky bottom behavior if content grew and user was at bottom
|
||||
if (shouldScrollToBottom) {
|
||||
this.scrollToBottom();
|
||||
}
|
||||
|
||||
// Position hardware cursor for IME
|
||||
this.positionHardwareCursor(cursorPos, newLines.length);
|
||||
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ export function buildAutoBootstrapContext(basePath: string): string {
|
|||
"SF headless autonomous found no milestones. Use the repository files below as the seed context.",
|
||||
"Research SF working specs first, then every relevant markdown document and every source file path before creating the initial milestone plan.",
|
||||
"Use tool-based repository inspection for source contents; do not assume the seed excerpt is complete.",
|
||||
"Treat .sf/PROJECT.md, .sf/REQUIREMENTS.md, .sf/DECISIONS.md, .sf/KNOWLEDGE.md, and .sf/RUNTIME.md as SF's canonical working spec/state docs when present.",
|
||||
"Treat .sf/PROJECT.md, .sf/REQUIREMENTS.md, .sf/DECISIONS.md, .sf/KNOWLEDGE.md, and .sf/RUNTIME.md as review/export or recovery surfaces when present; `.sf/sf.db` remains the canonical structured runtime state.",
|
||||
"Treat any root-level SPEC.md, BASE_SPEC.md, PRODUCT_SPEC.md, docs/specs files, or other docs as repo evidence for humans. Project facts SF needs later into SF's .sf working model and DB-backed state; do not create a parallel base-spec system.",
|
||||
"For product-facing or workflow-facing work, research the product category and representative competitors before locking requirements or slices. Capture table stakes, differentiators, common failure modes, and what not to copy.",
|
||||
"Extract the project purpose, vision, architecture, constraints, current TODOs, risks, eval/gate ideas, and implementation backlog.",
|
||||
|
|
|
|||
|
|
@ -8,20 +8,20 @@
|
|||
* Consumer: Every module in src/ and packages/ that needs application logging.
|
||||
*/
|
||||
|
||||
import { mkdirSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { getRotatingFileSink } from "@logtape/file";
|
||||
import {
|
||||
configure,
|
||||
reset,
|
||||
getLogger as logtapeGetLogger,
|
||||
getConsoleSink,
|
||||
getJsonLinesFormatter,
|
||||
type LogRecord,
|
||||
getLogger as logtapeGetLogger,
|
||||
reset,
|
||||
type Sink,
|
||||
} from "@logtape/logtape";
|
||||
import { getPrettyFormatter } from "@logtape/pretty";
|
||||
import { getRotatingFileSink } from "@logtape/file";
|
||||
import { redactByPattern, redactByField } from "@logtape/redaction";
|
||||
import { mkdirSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { redactByField, redactByPattern } from "@logtape/redaction";
|
||||
|
||||
export interface LoggerOptions {
|
||||
/** Session identifier for per-session log directories. */
|
||||
|
|
@ -108,7 +108,9 @@ function buildRedactingSink(
|
|||
*
|
||||
* Consumer: src/cli.ts early in startup, and test suites.
|
||||
*/
|
||||
export async function configureLogger(options: LoggerOptions = {}): Promise<void> {
|
||||
export async function configureLogger(
|
||||
options: LoggerOptions = {},
|
||||
): Promise<void> {
|
||||
if (configured) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -204,12 +206,17 @@ export async function configureLogger(options: LoggerOptions = {}): Promise<void
|
|||
*
|
||||
* Consumer: Every migrated module calls `const log = getLogger("sf.core.env")`.
|
||||
*/
|
||||
export function getLogger(category: string): ReturnType<typeof logtapeGetLogger> {
|
||||
export function getLogger(
|
||||
category: string,
|
||||
): ReturnType<typeof logtapeGetLogger> {
|
||||
return logtapeGetLogger(category.split("."));
|
||||
}
|
||||
|
||||
function inferMode(): "dev" | "autonomous" {
|
||||
if (process.env.SF_AUTONOMOUS === "1" || process.env.NODE_ENV === "production") {
|
||||
if (
|
||||
process.env.SF_AUTONOMOUS === "1" ||
|
||||
process.env.NODE_ENV === "production"
|
||||
) {
|
||||
return "autonomous";
|
||||
}
|
||||
return "dev";
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
* - Redacts secrets from tool results before the LLM sees them
|
||||
* - Blocks dangerous bash commands (rm -rf, sudo, mkfs, etc.)
|
||||
* - Blocks writes to protected paths (.env, .git, .ssh, etc.)
|
||||
* - Registers SF slash commands: /safegit, /safegit-level, /safegit-status, /yolo
|
||||
*/
|
||||
import * as path from "node:path";
|
||||
|
||||
|
|
@ -222,7 +223,7 @@ const GIT_PATTERNS = [
|
|||
},
|
||||
// Medium risk
|
||||
{ pattern: /\bgit\s+push\b/i, action: "push", severity: "medium" },
|
||||
{ pattern: /\bgit\s+commit\b/i, action: "commit", severity: "medium" },
|
||||
// git commit removed - commits are now allowed without prompting
|
||||
{ pattern: /\bgit\s+rebase\b/i, action: "rebase", severity: "medium" },
|
||||
{ pattern: /\bgit\s+merge\b/i, action: "merge", severity: "medium" },
|
||||
{
|
||||
|
|
@ -414,6 +415,15 @@ async function checkGitCommand(
|
|||
}
|
||||
return undefined;
|
||||
}
|
||||
/**
|
||||
* Register SF slash commands for session-local safe-git controls.
|
||||
*
|
||||
* Purpose: expose Git guardrail controls through the SF command surface rather
|
||||
* than as shell binaries, so users can type `/safegit` in chat/TUI and update
|
||||
* the current session's Git approval policy.
|
||||
*
|
||||
* Consumer: guardrails(pi) during extension registration.
|
||||
*/
|
||||
function registerSafeGitCommands(
|
||||
pi,
|
||||
sessionEnabledOverride,
|
||||
|
|
@ -421,7 +431,8 @@ function registerSafeGitCommands(
|
|||
yoloPreviousPromptLevel,
|
||||
) {
|
||||
pi.registerCommand("safegit", {
|
||||
description: "Toggle safe-git protection on/off for this session",
|
||||
description:
|
||||
"Slash command /safegit: toggle safe-git protection for this session",
|
||||
handler: async (_, ctx) => {
|
||||
const { enabled } = getSafeGitConfig(
|
||||
ctx,
|
||||
|
|
@ -439,7 +450,8 @@ function registerSafeGitCommands(
|
|||
},
|
||||
});
|
||||
pi.registerCommand("safegit-level", {
|
||||
description: "Set prompt level: high, medium, or none",
|
||||
description:
|
||||
"Slash command /safegit-level: set prompt level to high, medium, or none",
|
||||
handler: async (args, ctx) => {
|
||||
const arg = typeof args === "string" ? args.trim().toLowerCase() : "";
|
||||
if (arg === "high" || arg === "medium" || arg === "none") {
|
||||
|
|
@ -479,7 +491,8 @@ function registerSafeGitCommands(
|
|||
},
|
||||
});
|
||||
pi.registerCommand("yolo", {
|
||||
description: "Toggle session-only safe-git prompt bypass",
|
||||
description:
|
||||
"Slash command /yolo: toggle session-only safe-git prompt bypass",
|
||||
handler: async (_, ctx) => {
|
||||
const { promptLevel } = getSafeGitConfig(
|
||||
ctx,
|
||||
|
|
@ -506,7 +519,8 @@ function registerSafeGitCommands(
|
|||
},
|
||||
});
|
||||
pi.registerCommand("safegit-status", {
|
||||
description: "Show safe-git status and settings",
|
||||
description:
|
||||
"Slash command /safegit-status: show safe-git status and settings",
|
||||
handler: async (_, ctx) => {
|
||||
const settings = ctx.settingsManager?.getSettings() ?? {};
|
||||
const globalConfig = {
|
||||
|
|
|
|||
|
|
@ -87,11 +87,13 @@ import { handleProductAudit } from "../tools/product-audit-tool.js";
|
|||
import { parseUnitId } from "../unit-id.js";
|
||||
import { resolveUokFlags } from "../uok/flags.js";
|
||||
import { UokGateRunner } from "../uok/gate-runner.js";
|
||||
import { emitModelAutoResolvedEvent } from "../uok/model-route-evidence.js";
|
||||
import {
|
||||
ensurePlanV2Graph as ensurePlanningFlowGraph,
|
||||
isEmptyPlanV2GraphResult,
|
||||
isMissingFinalizedContextResult,
|
||||
} from "../uok/plan-v2.js";
|
||||
import { buildUokProgressEvent } from "../uok/progress-event.js";
|
||||
import {
|
||||
clearUnitRuntimeRecord,
|
||||
writeUnitRuntimeRecord,
|
||||
|
|
@ -1875,6 +1877,25 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
eventType: "unit-start",
|
||||
data: { unitType, unitId },
|
||||
});
|
||||
{
|
||||
const progressEvent = buildUokProgressEvent({
|
||||
eventType: "unit_selected",
|
||||
unitType,
|
||||
unitId,
|
||||
role: "worker",
|
||||
sessionId: ctx.sessionManager.getSessionId(),
|
||||
traceId: ic.flowId,
|
||||
data: { legacyEventType: "unit-start" },
|
||||
});
|
||||
deps.emitJournalEvent({
|
||||
ts: progressEvent.ts,
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: progressEvent.eventType,
|
||||
data: progressEvent,
|
||||
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
|
||||
});
|
||||
}
|
||||
ctx.ui.notify(`[unit] ${unitType} ${unitId} starting`, "info");
|
||||
deps.captureAvailableSkills();
|
||||
writeUnitRuntimeRecord(
|
||||
|
|
@ -1890,6 +1911,11 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
progressCount: 0,
|
||||
lastProgressKind: "dispatch",
|
||||
recoveryAttempts: 0, // Reset so re-dispatched units get full recovery budget (#2322)
|
||||
lineageEvent: {
|
||||
status: "started",
|
||||
workerSessionId: ctx.sessionManager.getSessionId(),
|
||||
note: "unit dispatched",
|
||||
},
|
||||
},
|
||||
);
|
||||
// Status bar (widget + preconditions deferred until after model selection — see #2899)
|
||||
|
|
@ -2125,6 +2151,48 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
s.currentDispatchedModelId = s.currentUnitModel
|
||||
? `${s.currentUnitModel.provider ?? ""}/${s.currentUnitModel.id ?? ""}`
|
||||
: null;
|
||||
emitModelAutoResolvedEvent(s.basePath, {
|
||||
traceId: `model:${ctx.sessionManager.getSessionId()}:${unitType}:${unitId}`,
|
||||
unitType,
|
||||
unitId,
|
||||
resolvedModel: s.currentUnitModel ?? ctx.model ?? null,
|
||||
authMode:
|
||||
(s.currentUnitModel?.provider ?? ctx.model?.provider)
|
||||
? ctx.modelRegistry.getProviderAuthMode(
|
||||
s.currentUnitModel?.provider ?? ctx.model.provider,
|
||||
)
|
||||
: undefined,
|
||||
routingReason: hookModelOverride
|
||||
? `hook override: ${hookModelOverride}`
|
||||
: "auto selector",
|
||||
routing: s.currentUnitRouting,
|
||||
hookOverrideApplied: Boolean(hookModelOverride),
|
||||
tokenUsage: collectSessionTokenUsage?.(ctx),
|
||||
});
|
||||
{
|
||||
const progressEvent = buildUokProgressEvent({
|
||||
eventType: "model_auto_resolved",
|
||||
unitType,
|
||||
unitId,
|
||||
role: "worker",
|
||||
sessionId: ctx.sessionManager.getSessionId(),
|
||||
traceId: ic.flowId,
|
||||
data: {
|
||||
resolvedProvider: s.currentUnitModel?.provider ?? ctx.model?.provider,
|
||||
resolvedModel: s.currentUnitModel?.id ?? ctx.model?.id,
|
||||
routing: s.currentUnitRouting,
|
||||
hookOverrideApplied: Boolean(hookModelOverride),
|
||||
},
|
||||
});
|
||||
deps.emitJournalEvent({
|
||||
ts: progressEvent.ts,
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: progressEvent.eventType,
|
||||
data: progressEvent,
|
||||
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
|
||||
});
|
||||
}
|
||||
const compatibilityError = getWorkflowTransportSupportError(
|
||||
s.currentUnitModel?.provider ?? ctx.model?.provider,
|
||||
getRequiredWorkflowToolsForAutoUnit(unitType),
|
||||
|
|
@ -2724,6 +2792,75 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
},
|
||||
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
|
||||
});
|
||||
if (
|
||||
currentUnitResult.status === "completed" ||
|
||||
currentUnitResult.status === "blocked"
|
||||
) {
|
||||
const progressEvent = buildUokProgressEvent({
|
||||
eventType:
|
||||
currentUnitResult.status === "completed"
|
||||
? "unit_completed"
|
||||
: "unit_blocked",
|
||||
unitType,
|
||||
unitId,
|
||||
role: "worker",
|
||||
sessionId: ctx.sessionManager.getSessionId(),
|
||||
traceId: ic.flowId,
|
||||
data: {
|
||||
status: currentUnitResult.status,
|
||||
artifactVerified,
|
||||
legacyEventType: "unit-end",
|
||||
...(unitEndEntry
|
||||
? {
|
||||
cost_usd: unitEndEntry.cost,
|
||||
tokens: unitEndEntry.tokens.total,
|
||||
}
|
||||
: {}),
|
||||
},
|
||||
});
|
||||
deps.emitJournalEvent({
|
||||
ts: progressEvent.ts,
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: progressEvent.eventType,
|
||||
data: progressEvent,
|
||||
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
|
||||
});
|
||||
}
|
||||
}
|
||||
{
|
||||
const runtimeStatus =
|
||||
currentUnitResult.status === "completed"
|
||||
? artifactVerified
|
||||
? "completed"
|
||||
: "blocked"
|
||||
: currentUnitResult.status === "error"
|
||||
? "failed"
|
||||
: currentUnitResult.status;
|
||||
const lineageStatus =
|
||||
runtimeStatus === "completed"
|
||||
? "completed"
|
||||
: runtimeStatus === "blocked"
|
||||
? "blocked"
|
||||
: runtimeStatus === "cancelled"
|
||||
? "cancelled"
|
||||
: "failed";
|
||||
writeUnitRuntimeRecord(
|
||||
s.basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
s.currentUnit?.startedAt ?? Date.now(),
|
||||
{
|
||||
status: runtimeStatus,
|
||||
lastProgressAt: Date.now(),
|
||||
lastProgressKind: "unit-end",
|
||||
lineageEvent: {
|
||||
status: lineageStatus,
|
||||
workerSessionId: ctx.sessionManager.getSessionId(),
|
||||
note: `unit ended with ${currentUnitResult.status}`,
|
||||
},
|
||||
},
|
||||
);
|
||||
}
|
||||
{
|
||||
const verdict =
|
||||
|
|
|
|||
|
|
@ -663,7 +663,7 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
|
|||
"",
|
||||
"**Low-confidence reconstruction guidance**:",
|
||||
"- Use outcome='decide' when evidence is sparse or ambiguous (confidence < 0.98)",
|
||||
"- Use outcome='decide' when you cannot verify what work was actually completed",
|
||||
"- Use outcome='decide' when you cannot verify what work was actually completed",
|
||||
"- Use outcome='decide' when there are multiple possible interpretations of progress",
|
||||
"- This ensures autonomous mode pauses for human acceptance rather than guessing incorrectly",
|
||||
);
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ import {
|
|||
recordToolResult as safetyRecordToolResult,
|
||||
saveEvidenceToDisk,
|
||||
} from "../safety/evidence-collector.js";
|
||||
import { initSessionRecorder } from "../session-recorder.js";
|
||||
import { deriveState } from "../state.js";
|
||||
import { countGoogleGeminiCliTokens } from "../token-counter.js";
|
||||
import { parseUnitId } from "../unit-id.js";
|
||||
|
|
@ -163,6 +164,10 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
if (sid) {
|
||||
process.stderr.write(`[forge] session ${sid.slice(0, 8)} · ${sfile}\n`);
|
||||
}
|
||||
// Establish the session row so all subsequent turns have a parent.
|
||||
// Git context (repo, branch) is patched in before_agent_start once the
|
||||
// DB is open and the cwd is confirmed.
|
||||
initSessionRecorder(sid, { mode: "interactive", cwd: process.cwd() });
|
||||
} catch {
|
||||
/* non-fatal */
|
||||
}
|
||||
|
|
@ -176,6 +181,24 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
await syncServiceTierStatus(ctx);
|
||||
await initializeLearningRuntime();
|
||||
await runSessionStartupDoctorFix(ctx);
|
||||
// Initialize metrics-central with database adapter
|
||||
try {
|
||||
const { initMetricsCentral } = await import("../metrics-central.js");
|
||||
const { getDatabase } = await import("../sf-db.js");
|
||||
const dbAdapter = getDatabase();
|
||||
const sessionId = ctx.sessionManager?.getSessionId?.() || "";
|
||||
initMetricsCentral(process.cwd(), {
|
||||
sessionId,
|
||||
dbAdapter,
|
||||
});
|
||||
} catch (err) {
|
||||
// Non-fatal: metrics should not block session start
|
||||
const { logWarning } = await import("../workflow-logger.js");
|
||||
logWarning(
|
||||
"session-start",
|
||||
`Failed to initialize metrics-central: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
// Apply show_token_cost preference (#1515)
|
||||
try {
|
||||
const { loadEffectiveSFPreferences } = await import("../preferences.js");
|
||||
|
|
@ -373,6 +396,15 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
await syncServiceTierStatus(ctx);
|
||||
await initializeLearningRuntime();
|
||||
loadToolApiKeys();
|
||||
// Re-establish session recorder for the new session so turn recording
|
||||
// continues under the correct session_id without contaminating the
|
||||
// previous session's rows.
|
||||
try {
|
||||
const sid = ctx.sessionManager?.getSessionId?.() ?? "";
|
||||
initSessionRecorder(sid, { mode: "interactive", cwd: process.cwd() });
|
||||
} catch {
|
||||
/* non-fatal */
|
||||
}
|
||||
});
|
||||
pi.on("before_agent_start", async (event, ctx) => {
|
||||
// Refresh the ecosystem snapshot BEFORE running ecosystem handlers so they
|
||||
|
|
@ -383,9 +415,45 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
const basePath = process.cwd();
|
||||
const state = await deriveState(basePath);
|
||||
updateSnapshot(state);
|
||||
// Patch git context on the first turn now that the DB is confirmed open.
|
||||
// Best-effort: git may be absent (e.g. tmp dirs), so we swallow errors.
|
||||
try {
|
||||
const { execFileSync } = await import("node:child_process");
|
||||
const branch = execFileSync(
|
||||
"git",
|
||||
["rev-parse", "--abbrev-ref", "HEAD"],
|
||||
{
|
||||
cwd: basePath,
|
||||
encoding: "utf-8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
timeout: 3_000,
|
||||
},
|
||||
).trim();
|
||||
const remoteUrl = execFileSync(
|
||||
"git",
|
||||
["config", "--get", "remote.origin.url"],
|
||||
{
|
||||
cwd: basePath,
|
||||
encoding: "utf-8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
timeout: 3_000,
|
||||
},
|
||||
).trim();
|
||||
patchSessionGitContext(remoteUrl || null, branch || null);
|
||||
} catch {
|
||||
/* non-fatal: git absent or not a repo */
|
||||
}
|
||||
} catch {
|
||||
updateSnapshot(null);
|
||||
}
|
||||
// Record user message as the start of a new turn. Done after ensureDbOpen
|
||||
// so the turns row lands in the DB immediately; agent_end will patch the
|
||||
// assistant_response onto it once the model finishes.
|
||||
try {
|
||||
recordTurnStart(typeof event.prompt === "string" ? event.prompt : null);
|
||||
} catch {
|
||||
/* non-fatal: turn recording must never block the agent */
|
||||
}
|
||||
// Await ecosystem loading, then dispatch any registered handlers.
|
||||
await getEcosystemReadyPromise();
|
||||
for (const handler of ecosystemHandlers) {
|
||||
|
|
@ -401,6 +469,19 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
resetToolCallLoopGuard();
|
||||
resetAskUserQuestionsCache();
|
||||
await handleAgentEnd(pi, event, ctx);
|
||||
// Complete the pending turn row with the assistant's text response.
|
||||
// event.messages is an array; the last entry is the model's reply.
|
||||
// Its .content is an array of content blocks — extract the first text block.
|
||||
try {
|
||||
const msgs = Array.isArray(event.messages) ? event.messages : [];
|
||||
const lastMsg = msgs[msgs.length - 1];
|
||||
const textBlock = Array.isArray(lastMsg?.content)
|
||||
? lastMsg.content.find((b) => b.type === "text" && b.text)
|
||||
: null;
|
||||
recordTurnEnd(textBlock?.text ?? null);
|
||||
} catch {
|
||||
/* non-fatal: turn recording must never block agent teardown */
|
||||
}
|
||||
// Best-effort embedding backfill: when SF_LLM_GATEWAY_KEY is set and the
|
||||
// gateway has an embed worker online, embed any memories that don't yet
|
||||
// have a vector. Bounded per invocation; logs once-per-minute when the
|
||||
|
|
@ -538,7 +619,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
|
||||
// Return custom compaction summary that preserves work state
|
||||
// instead of cancelling compaction
|
||||
return {
|
||||
const result = {
|
||||
compaction: {
|
||||
summary:
|
||||
workState.length > 0
|
||||
|
|
@ -553,9 +634,31 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
},
|
||||
},
|
||||
};
|
||||
// Persist compaction summary as the session's most recent work description
|
||||
// so memory-pipeline ingestion has a compact semantic handle for retrieval.
|
||||
try {
|
||||
updateSessionSummary(result.compaction.summary);
|
||||
} catch {
|
||||
/* non-fatal */
|
||||
}
|
||||
return result;
|
||||
});
|
||||
pi.on("session_shutdown", async (_event, ctx) => {
|
||||
// Flush any in-flight turn (e.g. interrupted agent) and clear session state
|
||||
// so the recorder doesn't carry stale IDs into a subsequent process reuse.
|
||||
try {
|
||||
resetSessionRecorder();
|
||||
} catch {
|
||||
/* non-fatal */
|
||||
}
|
||||
resetLearningRuntime();
|
||||
// Stop metrics-central on session shutdown
|
||||
try {
|
||||
const { stopMetricsCentral } = await import("../metrics-central.js");
|
||||
stopMetricsCentral();
|
||||
} catch {
|
||||
// Non-fatal: cleanup should not block shutdown
|
||||
}
|
||||
if (isParallelActive()) {
|
||||
try {
|
||||
await shutdownParallel(process.cwd());
|
||||
|
|
@ -581,6 +684,14 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
if (loopCheck.block) {
|
||||
return { block: true, reason: loopCheck.reason };
|
||||
}
|
||||
// ── Session file-touch recording ──────────────────────────────────────
|
||||
// Best-effort: path may be absent for non-file tools; recordFileTouch
|
||||
// no-ops on non-write tools and when no session is active.
|
||||
try {
|
||||
recordFileTouch(event.toolName, event.input?.path ?? null);
|
||||
} catch {
|
||||
/* non-fatal */
|
||||
}
|
||||
// ── Research unit terminal transition enforcement ─────────────────────
|
||||
// After a research unit (research-slice/research-milestone) successfully
|
||||
// saves its RESEARCH artifact via sf_summary_save, the tool returns
|
||||
|
|
@ -961,6 +1072,22 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
});
|
||||
pi.on("tool_execution_end", async (event) => {
|
||||
markToolEnd(event.toolCallId);
|
||||
// Record tool execution performance metrics
|
||||
try {
|
||||
const { recordToolExecution } = await import("../metrics-central.js");
|
||||
recordToolExecution(
|
||||
event.toolName,
|
||||
event.durationMs,
|
||||
event.isError,
|
||||
event.isError
|
||||
? typeof event.result === "string"
|
||||
? event.result
|
||||
: "tool_error"
|
||||
: undefined,
|
||||
);
|
||||
} catch {
|
||||
// Non-fatal: metrics should not break tool execution
|
||||
}
|
||||
// #2883/#4974: Capture deterministic invocation/policy errors so
|
||||
// postUnitPreVerification can break the retry loop instead of re-dispatching.
|
||||
// Covers sf_ tool JSON errors AND write-gate blocks on write/edit/bash tools.
|
||||
|
|
@ -1040,6 +1167,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
return { messages };
|
||||
});
|
||||
pi.on("before_provider_request", async (event, ctx) => {
|
||||
const modelId = event.model?.id;
|
||||
const payload = event.payload;
|
||||
if (!payload || typeof payload !== "object") return;
|
||||
applyCompletionNudgeTemperature(payload);
|
||||
|
|
@ -1101,7 +1229,6 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
}
|
||||
}
|
||||
// ── Service Tier ────────────────────────────────────────────────────
|
||||
const modelId = event.model?.id;
|
||||
if (!modelId) {
|
||||
ctx.ui.setStatus("sf-gemini-tokens", undefined);
|
||||
return payload;
|
||||
|
|
|
|||
|
|
@ -474,10 +474,10 @@ function readMemoryDbStatus(adapter) {
|
|||
async function probeEmbedding(gatewayConfig, createGatewayEmbedFn) {
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
const embedFn = createGatewayEmbedFn({
|
||||
...gatewayConfig,
|
||||
timeoutMs: 10_000,
|
||||
});
|
||||
const embedFn = createGatewayEmbedFn(
|
||||
{ ...gatewayConfig, timeoutMs: 10_000 },
|
||||
{ instruction: gatewayConfig.queryInstruction },
|
||||
);
|
||||
const vectors = await embedFn(["sf memory status embedding probe"]);
|
||||
const dim = vectors[0]?.length ?? 0;
|
||||
if (dim <= 0) {
|
||||
|
|
|
|||
|
|
@ -356,15 +356,35 @@ async function listItems(args, ctx) {
|
|||
}
|
||||
|
||||
async function markDone(args, ctx) {
|
||||
const idPrefix = _joinPlain(_splitArgs(args));
|
||||
const parts = _splitArgs(args);
|
||||
let idPrefix = "";
|
||||
let scope = "project";
|
||||
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
const p = parts[i];
|
||||
if (p === "--scope" || p === "-s") {
|
||||
scope = parts[++i];
|
||||
continue;
|
||||
}
|
||||
if (!idPrefix) {
|
||||
idPrefix = p;
|
||||
}
|
||||
}
|
||||
|
||||
if (!idPrefix) {
|
||||
ctx.ui.notify("Usage: /schedule done \u003cid\u003e", "warning");
|
||||
ctx.ui.notify("Usage: /schedule done [--scope <scope>] <id>", "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
if (scope !== "project" && scope !== "global") {
|
||||
ctx.ui.notify(`Unknown scope: ${scope}. Valid: project, global`, "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
const store = createScheduleStore(_basePath());
|
||||
const { entry } = _findEntry(store, "project", idPrefix);
|
||||
const { entry } = _findEntry(store, scope, idPrefix);
|
||||
if (!entry) {
|
||||
ctx.ui.notify(`Item ${idPrefix} not found in project scope.`, "warning");
|
||||
ctx.ui.notify(`Item ${idPrefix} not found in ${scope} scope.`, "warning");
|
||||
return;
|
||||
}
|
||||
const updated = {
|
||||
|
|
@ -372,20 +392,40 @@ async function markDone(args, ctx) {
|
|||
status: "done",
|
||||
created_at: new Date().toISOString(),
|
||||
};
|
||||
store.appendEntry("project", updated);
|
||||
store.appendEntry(scope, updated);
|
||||
ctx.ui.notify(`Marked done: ${entry.id}`, "success");
|
||||
}
|
||||
|
||||
async function markCancel(args, ctx) {
|
||||
const idPrefix = _joinPlain(_splitArgs(args));
|
||||
const parts = _splitArgs(args);
|
||||
let idPrefix = "";
|
||||
let scope = "project";
|
||||
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
const p = parts[i];
|
||||
if (p === "--scope" || p === "-s") {
|
||||
scope = parts[++i];
|
||||
continue;
|
||||
}
|
||||
if (!idPrefix) {
|
||||
idPrefix = p;
|
||||
}
|
||||
}
|
||||
|
||||
if (!idPrefix) {
|
||||
ctx.ui.notify("Usage: /schedule cancel \u003cid\u003e", "warning");
|
||||
ctx.ui.notify("Usage: /schedule cancel [--scope <scope>] <id>", "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
if (scope !== "project" && scope !== "global") {
|
||||
ctx.ui.notify(`Unknown scope: ${scope}. Valid: project, global`, "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
const store = createScheduleStore(_basePath());
|
||||
const { entry } = _findEntry(store, "project", idPrefix);
|
||||
const { entry } = _findEntry(store, scope, idPrefix);
|
||||
if (!entry) {
|
||||
ctx.ui.notify(`Item ${idPrefix} not found in project scope.`, "warning");
|
||||
ctx.ui.notify(`Item ${idPrefix} not found in ${scope} scope.`, "warning");
|
||||
return;
|
||||
}
|
||||
const updated = {
|
||||
|
|
@ -393,7 +433,7 @@ async function markCancel(args, ctx) {
|
|||
status: "cancelled",
|
||||
created_at: new Date().toISOString(),
|
||||
};
|
||||
store.appendEntry("project", updated);
|
||||
store.appendEntry(scope, updated);
|
||||
ctx.ui.notify(`Cancelled: ${entry.id}`, "success");
|
||||
}
|
||||
|
||||
|
|
@ -401,10 +441,13 @@ async function snoozeItem(args, ctx) {
|
|||
const parts = _splitArgs(args);
|
||||
let idPrefix = "";
|
||||
let by = "";
|
||||
let scope = "project";
|
||||
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
if (parts[i] === "--by" || parts[i] === "-b") {
|
||||
by = parts[++i];
|
||||
} else if (parts[i] === "--scope" || parts[i] === "-s") {
|
||||
scope = parts[++i];
|
||||
} else if (!idPrefix) {
|
||||
idPrefix = parts[i];
|
||||
}
|
||||
|
|
@ -412,16 +455,21 @@ async function snoozeItem(args, ctx) {
|
|||
|
||||
if (!idPrefix || !by) {
|
||||
ctx.ui.notify(
|
||||
"Usage: /schedule snooze \u003cid\u003e --by \u003cduration\u003e",
|
||||
"Usage: /schedule snooze [--scope <scope>] <id> --by <duration>",
|
||||
"warning",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (scope !== "project" && scope !== "global") {
|
||||
ctx.ui.notify(`Unknown scope: ${scope}. Valid: project, global`, "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
const store = createScheduleStore(_basePath());
|
||||
const { entry } = _findEntry(store, "project", idPrefix);
|
||||
const { entry } = _findEntry(store, scope, idPrefix);
|
||||
if (!entry) {
|
||||
ctx.ui.notify(`Item ${idPrefix} not found in project scope.`, "warning");
|
||||
ctx.ui.notify(`Item ${idPrefix} not found in ${scope} scope.`, "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -444,29 +492,43 @@ async function snoozeItem(args, ctx) {
|
|||
created_at: now,
|
||||
snoozed_at: now,
|
||||
};
|
||||
store.appendEntry("project", updated);
|
||||
store.appendEntry(scope, updated);
|
||||
ctx.ui.notify(`Snoozed: ${entry.id}\nNew due: ${newDue}`, "success");
|
||||
}
|
||||
|
||||
async function runItem(args, ctx) {
|
||||
const parts = _splitArgs(args);
|
||||
let idPrefix = "";
|
||||
let scope = "project";
|
||||
let dryRun = false;
|
||||
|
||||
for (const part of parts) {
|
||||
if (part === "--dry-run" || part === "--dry") {
|
||||
dryRun = true;
|
||||
continue;
|
||||
} else if (part === "--scope" || part === "-s") {
|
||||
scope = parts[parts.indexOf(part) + 1];
|
||||
} else if (!idPrefix) {
|
||||
idPrefix = part;
|
||||
}
|
||||
if (!idPrefix) idPrefix = part;
|
||||
}
|
||||
|
||||
if (!idPrefix) {
|
||||
ctx.ui.notify("Usage: /schedule run [--dry-run] \u003cid\u003e", "warning");
|
||||
ctx.ui.notify(
|
||||
"Usage: /schedule run [--scope <scope>] [--dry-run] <id>",
|
||||
"warning",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (scope !== "project" && scope !== "global") {
|
||||
ctx.ui.notify(`Unknown scope: ${scope}. Valid: project, global`, "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
const store = createScheduleStore(_basePath());
|
||||
const { entry } = _findEntry(store, "project", idPrefix);
|
||||
const { entry } = _findEntry(store, scope, idPrefix);
|
||||
if (!entry) {
|
||||
ctx.ui.notify(`Item ${idPrefix} not found in project scope.`, "warning");
|
||||
ctx.ui.notify(`Item ${idPrefix} not found in ${scope} scope.`, "warning");
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -490,7 +552,8 @@ async function runItem(args, ctx) {
|
|||
id: entry.id,
|
||||
kind: entry.kind,
|
||||
status: entry.status,
|
||||
cwd: _basePath(),
|
||||
scope: scope,
|
||||
cwd: scope === "project" ? _basePath() : undefined,
|
||||
command,
|
||||
autonomous_dispatch: entry.autonomous_dispatch === true,
|
||||
would_execute: typeof command === "string" && command.length > 0,
|
||||
|
|
@ -502,6 +565,18 @@ async function runItem(args, ctx) {
|
|||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Global scope commands cannot execute (no repo context)
|
||||
if (scope === "global") {
|
||||
ctx.ui.notify(
|
||||
`Cannot execute global scope command: ${entry.id}\n` +
|
||||
`Global commands can only be run in project context.\n` +
|
||||
`Use project scope for executable commands.`,
|
||||
"warning",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const result = executeProjectScheduleCommand(_basePath(), entry);
|
||||
if (!result.ok) {
|
||||
ctx.ui.notify(`Command failed: ${result.reason}`, "error");
|
||||
|
|
@ -566,10 +641,10 @@ export async function handleSchedule(args, ctx) {
|
|||
"Usage: /schedule add|list|done|cancel|snooze|run\n" +
|
||||
" add --in \u003cduration\u003e [--kind \u003ckind\u003e] [--scope \u003cscope\u003e] [--autonomous-dispatch] \u003ctitle-or-command\u003e\n" +
|
||||
" list [--due] [--all] [--json] [--scope \u003cscope\u003e]\n" +
|
||||
" done \u003cid\u003e\n" +
|
||||
" cancel \u003cid\u003e\n" +
|
||||
" snooze \u003cid\u003e --by \u003cduration\u003e\n" +
|
||||
" run [--dry-run] \u003cid\u003e",
|
||||
" done [--scope \u003cscope\u003e] \u003cid\u003e\n" +
|
||||
" cancel [--scope \u003cscope\u003e] \u003cid\u003e\n" +
|
||||
" snooze [--scope \u003cscope\u003e] \u003cid\u003e --by \u003cduration\u003e\n" +
|
||||
" run [--scope \u003cscope\u003e] [--dry-run] \u003cid\u003e",
|
||||
"info",
|
||||
);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -726,19 +726,27 @@ export async function handleCoreCommand(trimmed, ctx, pi) {
|
|||
return true;
|
||||
}
|
||||
// Normal list mode
|
||||
const { loadSkills, getPermittedSkills, getModelInvocableSkills } =
|
||||
await import("../../skills/loader.js");
|
||||
const skills = loadSkills(projectRoot());
|
||||
const {
|
||||
loadSkills,
|
||||
getPermittedSkills,
|
||||
getModelInvocableSkills,
|
||||
getUserInvocableSkills,
|
||||
} = await import("../../skills/loader.js");
|
||||
const skills = loadSkills(projectRoot(), { includeBundled: true });
|
||||
const visibleSkills = getUserInvocableSkills(skills);
|
||||
const mode = getAutoSession().getMode();
|
||||
const permitted = getPermittedSkills(skills, mode.permissionProfile);
|
||||
const modelInvocable = getModelInvocableSkills(skills, mode.workMode);
|
||||
const permitted = getPermittedSkills(visibleSkills, mode.permissionProfile);
|
||||
const modelInvocable = getModelInvocableSkills(
|
||||
visibleSkills,
|
||||
mode.workMode,
|
||||
);
|
||||
|
||||
const lines = ["SF Skills\n"];
|
||||
lines.push(
|
||||
`Found ${skills.length} skill(s) · ${permitted.length} permitted · ${modelInvocable.length} model-invocable\n`,
|
||||
`Found ${visibleSkills.length} user-invocable skill(s) · ${permitted.length} permitted · ${modelInvocable.length} model-invocable\n`,
|
||||
);
|
||||
|
||||
for (const skill of skills) {
|
||||
for (const skill of visibleSkills) {
|
||||
const icon = skill.valid ? "✓" : "✗";
|
||||
const user = skill.userInvocable ? "U" : " ";
|
||||
const model = skill.modelInvocable ? "M" : " ";
|
||||
|
|
|
|||
|
|
@ -26,12 +26,56 @@ function logRerankUnavailable(msg) {
|
|||
lastRerankUnavailableLogAt = now;
|
||||
logWarning("memory-embeddings", msg);
|
||||
}
|
||||
// Circuit breaker for the embed path. When the remote gateway is unreachable
|
||||
// (network timeout, cold-start stall), each call would otherwise wait the full
|
||||
// DEFAULT_TIMEOUT_MS (30 s) before failing. After EMBED_CIRCUIT_THRESHOLD
|
||||
// consecutive failures the circuit opens for EMBED_CIRCUIT_OPEN_MS and returns
|
||||
// [] immediately — callers fall through to keyword-only ranking with no stall.
|
||||
// The circuit half-opens automatically after the cooldown expires.
|
||||
const EMBED_CIRCUIT_THRESHOLD = 3;
|
||||
const EMBED_CIRCUIT_OPEN_MS = 60_000;
|
||||
const embedCircuit = { failures: 0, openUntil: 0, lastLogAt: 0 };
|
||||
function embedCircuitIsOpen() {
|
||||
return embedCircuit.openUntil > Date.now();
|
||||
}
|
||||
function onEmbedSuccess() {
|
||||
embedCircuit.failures = 0;
|
||||
embedCircuit.openUntil = 0;
|
||||
}
|
||||
function onEmbedFailure() {
|
||||
embedCircuit.failures += 1;
|
||||
if (embedCircuit.failures >= EMBED_CIRCUIT_THRESHOLD) {
|
||||
embedCircuit.openUntil = Date.now() + EMBED_CIRCUIT_OPEN_MS;
|
||||
const now = Date.now();
|
||||
if (now - embedCircuit.lastLogAt >= EMBED_CIRCUIT_OPEN_MS) {
|
||||
embedCircuit.lastLogAt = now;
|
||||
logWarning(
|
||||
"memory-embeddings",
|
||||
`llm-gateway /embeddings circuit open after ${EMBED_CIRCUIT_THRESHOLD} failures; ` +
|
||||
`skipping embed for ${EMBED_CIRCUIT_OPEN_MS / 1000}s — memory search falls back to keyword ranking`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
const ENV_KEY = "SF_LLM_GATEWAY_KEY";
|
||||
const ENV_URL = "SF_LLM_GATEWAY_URL";
|
||||
const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL";
|
||||
const ENV_RERANK_MODEL = "SF_LLM_GATEWAY_RERANK_MODEL";
|
||||
const ENV_EMBED_QUERY_INSTRUCTION = "SF_LLM_GATEWAY_EMBED_QUERY_INSTRUCTION";
|
||||
const DEFAULT_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
|
||||
const DEFAULT_RERANK_MODEL = "Qwen/Qwen3-Reranker-0.6B";
|
||||
// Qwen3-Embedding uses asymmetric retrieval: queries are prefixed with a task
|
||||
// instruction so the model projects them into the "query" region of the embedding
|
||||
// space, while document texts are sent as-is (no instruction) so they land in
|
||||
// the "passage" region. Mixing these correctly is critical for retrieval quality.
|
||||
//
|
||||
// Format expected by the model: "Instruct: <task>\nQuery: " followed by the
|
||||
// query text (the gateway appends the text to the instruction). Documents omit
|
||||
// the instruction entirely.
|
||||
//
|
||||
// References: Qwen3-Embedding model card (HuggingFace) §Asymmetric Retrieval.
|
||||
const DEFAULT_QUERY_INSTRUCTION =
|
||||
"Instruct: Retrieve relevant software engineering memories, facts, and project decisions for the given query\nQuery: ";
|
||||
const KEY_ALIASES = [
|
||||
ENV_KEY,
|
||||
"LLM_GATEWAY_API_KEY",
|
||||
|
|
@ -50,7 +94,11 @@ function firstEnvValue(keys) {
|
|||
return firstEnvEntry(keys)?.value ?? "";
|
||||
}
|
||||
/** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is
|
||||
* missing — the gateway path is opt-in and silently absent otherwise. */
|
||||
* missing — the gateway path is opt-in and silently absent otherwise.
|
||||
*
|
||||
* `queryInstruction` is the Qwen3-style task instruction prepended to query
|
||||
* texts during retrieval. Document texts (backfill) are sent without it.
|
||||
* Override via SF_LLM_GATEWAY_EMBED_QUERY_INSTRUCTION. */
|
||||
export function loadGatewayConfigFromEnv() {
|
||||
const keyEntry = firstEnvEntry(KEY_ALIASES);
|
||||
if (!keyEntry) return null;
|
||||
|
|
@ -59,6 +107,8 @@ export function loadGatewayConfigFromEnv() {
|
|||
const embeddingModel =
|
||||
firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL;
|
||||
const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL;
|
||||
const queryInstruction =
|
||||
firstEnvValue([ENV_EMBED_QUERY_INSTRUCTION]) || DEFAULT_QUERY_INSTRUCTION;
|
||||
return {
|
||||
url,
|
||||
apiKey: keyEntry.value,
|
||||
|
|
@ -66,45 +116,70 @@ export function loadGatewayConfigFromEnv() {
|
|||
urlSource: urlEntry?.key ?? "default",
|
||||
embeddingModel,
|
||||
rerankModel,
|
||||
queryInstruction,
|
||||
};
|
||||
}
|
||||
/** Build an EmbedFn that posts to <url>/embeddings with Bearer auth.
|
||||
* Returns Float32Array[] in the same order as the input. Throws on HTTP
|
||||
* errors so the caller (embedMemories) logs and counts as zero. */
|
||||
export function createGatewayEmbedFn(config) {
|
||||
* errors so the caller (embedMemories) logs and counts as zero.
|
||||
* A circuit breaker short-circuits to [] after EMBED_CIRCUIT_THRESHOLD
|
||||
* consecutive failures so a down/cold gateway never stalls the caller for
|
||||
* the full 30 s timeout on every call.
|
||||
*
|
||||
* `opts.instruction` — when set, included as the top-level `instruction`
|
||||
* field in the request body. Qwen3-Embedding uses this for asymmetric
|
||||
* retrieval: pass `config.queryInstruction` for query embeddings; omit for
|
||||
* document/memory backfill so passages land in the correct embedding region. */
|
||||
export function createGatewayEmbedFn(config, opts) {
|
||||
return async (texts) => {
|
||||
if (texts.length === 0) return [];
|
||||
// Circuit open — fail fast, no network call.
|
||||
if (embedCircuitIsOpen()) return [];
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(
|
||||
() => controller.abort(),
|
||||
config.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
);
|
||||
try {
|
||||
const body = {
|
||||
model: config.embeddingModel,
|
||||
input: texts,
|
||||
};
|
||||
if (opts?.instruction) {
|
||||
body.instruction = opts.instruction;
|
||||
}
|
||||
const res = await fetch(`${config.url}/embeddings`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${config.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.embeddingModel,
|
||||
input: texts,
|
||||
}),
|
||||
body: JSON.stringify(body),
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => "");
|
||||
// Throw immediately — the outer catch handles onEmbedFailure once.
|
||||
throw new Error(
|
||||
`llm-gateway /embeddings ${res.status}: ${body.slice(0, 200)}`,
|
||||
);
|
||||
}
|
||||
const json = await res.json();
|
||||
if (!Array.isArray(json.data)) {
|
||||
// Throw — the outer catch handles onEmbedFailure once.
|
||||
throw new Error("llm-gateway /embeddings: missing data array");
|
||||
}
|
||||
// Sort by index to handle out-of-order responses defensively.
|
||||
const sorted = [...json.data].sort((a, b) => a.index - b.index);
|
||||
return sorted.map((d) => Float32Array.from(d.embedding));
|
||||
const result = sorted.map((d) => Float32Array.from(d.embedding));
|
||||
onEmbedSuccess();
|
||||
return result;
|
||||
} catch (err) {
|
||||
// Catch AbortError (timeout) and all thrown errors from above — all
|
||||
// count as a circuit failure. onEmbedFailure is called exactly once
|
||||
// per failed request regardless of failure mode.
|
||||
onEmbedFailure();
|
||||
throw err;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -236,7 +236,12 @@ export function rankMemoriesByEmbedding(
|
|||
.sort((a, b) => b.combinedScore - a.combinedScore);
|
||||
}
|
||||
/** Embed `query` via the configured gateway and return its Float32Array, or
|
||||
* null when no gateway is configured / the embed call fails. Best-effort. */
|
||||
* null when no gateway is configured / the embed call fails. Best-effort.
|
||||
*
|
||||
* Passes `config.queryInstruction` so Qwen3-Embedding projects the query into
|
||||
* the correct asymmetric retrieval region of the embedding space. Document
|
||||
* embeddings (backfill) are created without an instruction — the two must be
|
||||
* consistent for cosine similarity to be meaningful. */
|
||||
export async function embedQueryViaGateway(query) {
|
||||
if (!query.trim()) return null;
|
||||
try {
|
||||
|
|
@ -245,7 +250,9 @@ export async function embedQueryViaGateway(query) {
|
|||
);
|
||||
const cfg = loadGatewayConfigFromEnv();
|
||||
if (!cfg) return null;
|
||||
const embedFn = createGatewayEmbedFn(cfg);
|
||||
const embedFn = createGatewayEmbedFn(cfg, {
|
||||
instruction: cfg.queryInstruction,
|
||||
});
|
||||
const vectors = await embedFn([query]);
|
||||
return vectors[0] ?? null;
|
||||
} catch (err) {
|
||||
|
|
|
|||
|
|
@ -29,6 +29,128 @@ const FLUSH_RETRY_MAX = 3;
|
|||
const FLUSH_RETRY_BASE_MS = 1000;
|
||||
const METRIC_NAME_PATTERN = /^[a-zA-Z_:][a-zA-Z0-9_:]*$/;
|
||||
|
||||
// ─── Metrics System Performance Monitoring ──────────────────────────────────
|
||||
|
||||
let _metricsSystemStartTime = Date.now();
|
||||
let _flushCount = 0;
|
||||
let _flushSuccessCount = 0;
|
||||
let _flushFailureCount = 0;
|
||||
let _lastFlushDuration = 0;
|
||||
let _lastFlushTimestamp = 0;
|
||||
let _totalFlushDuration = 0;
|
||||
|
||||
/**
|
||||
* Get metrics system performance stats.
|
||||
*/
|
||||
export function getMetricsSystemStats() {
|
||||
const uptime = Date.now() - _metricsSystemStartTime;
|
||||
return {
|
||||
uptimeMs: uptime,
|
||||
uptimeSeconds: Math.floor(uptime / 1000),
|
||||
flushCount: _flushCount,
|
||||
flushSuccessCount: _flushSuccessCount,
|
||||
flushFailureCount: _flushFailureCount,
|
||||
successRate:
|
||||
_flushCount > 0
|
||||
? `${((_flushSuccessCount / _flushCount) * 100).toFixed(1)}%`
|
||||
: "0%",
|
||||
lastFlushDuration: _lastFlushDuration,
|
||||
lastFlushTimestamp: _lastFlushTimestamp,
|
||||
averageFlushDuration:
|
||||
_flushSuccessCount > 0
|
||||
? Math.round(_totalFlushDuration / _flushSuccessCount)
|
||||
: 0,
|
||||
databaseStatus: _dbAdapter ? "connected" : "disconnected",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get system performance dashboard metrics.
|
||||
* Returns a formatted summary of key performance indicators.
|
||||
*/
|
||||
export function getSystemPerformanceDashboard() {
|
||||
const systemStats = getMetricsSystemStats();
|
||||
const registry = getRegistry();
|
||||
|
||||
return {
|
||||
uptime: systemStats.uptimeSeconds,
|
||||
metricsSystemHealth: {
|
||||
status: systemStats.databaseStatus,
|
||||
successRate: systemStats.successRate,
|
||||
flushCount: systemStats.flushCount,
|
||||
averageFlushDuration: `${systemStats.averageFlushDuration}ms`,
|
||||
},
|
||||
cost: extractMetricValue(registry, "sf_cost_total"),
|
||||
tokens: {
|
||||
input: extractMetricValue(registry, "sf_tokens_input_total"),
|
||||
output: extractMetricValue(registry, "sf_tokens_output_total"),
|
||||
},
|
||||
performance: {
|
||||
averageToolExecution: extractMetricHistogramMean(
|
||||
registry,
|
||||
"sf_tool_execution_duration_ms",
|
||||
),
|
||||
averageModelRequest: extractMetricHistogramMean(
|
||||
registry,
|
||||
"sf_model_request_duration_ms",
|
||||
),
|
||||
averageDatabaseQuery: extractMetricHistogramMean(
|
||||
registry,
|
||||
"sf_database_query_duration_ms",
|
||||
),
|
||||
},
|
||||
errors: {
|
||||
tool: extractMetricValue(registry, "sf_tool_errors_total"),
|
||||
model: extractMetricValue(registry, "sf_model_errors_total"),
|
||||
database: extractMetricValue(registry, "sf_database_errors_total"),
|
||||
system: extractMetricValue(registry, "sf_system_warnings_total"),
|
||||
},
|
||||
resources: {
|
||||
activeSessions: extractMetricGaugeValue(
|
||||
registry,
|
||||
"sf_active_sessions_count",
|
||||
),
|
||||
activeAgents: extractMetricGaugeValue(registry, "sf_active_agents_count"),
|
||||
concurrentToolCalls: extractMetricGaugeValue(
|
||||
registry,
|
||||
"sf_concurrent_tool_calls",
|
||||
),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a metric value from the registry.
|
||||
*/
|
||||
function extractMetricValue(registry, metricName) {
|
||||
const metric = registry.counters.get(metricName);
|
||||
if (!metric) return 0;
|
||||
let total = 0;
|
||||
for (const value of metric.values.values()) total += value;
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract histogram mean value.
|
||||
*/
|
||||
function extractMetricHistogramMean(registry, metricName) {
|
||||
const hist = registry.histograms.get(metricName);
|
||||
if (!hist || hist.count === 0) return 0;
|
||||
return Math.round(hist.sum / hist.count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract gauge value.
|
||||
*/
|
||||
function extractMetricGaugeValue(registry, metricName) {
|
||||
const gauge = registry.gauges.get(metricName);
|
||||
if (!gauge || gauge.values.size === 0) return 0;
|
||||
|
||||
// For gauges, return the most recent value
|
||||
const values = Array.from(gauge.values.values());
|
||||
return values[values.length - 1] ?? 0;
|
||||
}
|
||||
|
||||
// ─── Metric Types ───────────────────────────────────────────────────────────
|
||||
|
||||
class Counter {
|
||||
|
|
@ -266,6 +388,7 @@ class MetricsRegistry {
|
|||
|
||||
let _registry = null;
|
||||
let _flushTimer = null;
|
||||
let _metricsHealthTimer = null;
|
||||
let _basePath = "";
|
||||
let _sessionId = "";
|
||||
let _dbAdapter = null;
|
||||
|
|
@ -318,7 +441,7 @@ function persistMetricsToDb(registry, sessionId, db) {
|
|||
);
|
||||
for (const c of registry.counters.values()) {
|
||||
for (const [key, value] of c.values) {
|
||||
const labels = c._parseKey(key);
|
||||
const labels = _parseLabelKey(key);
|
||||
insert.run(
|
||||
c.name,
|
||||
"counter",
|
||||
|
|
@ -331,7 +454,7 @@ function persistMetricsToDb(registry, sessionId, db) {
|
|||
}
|
||||
for (const g of registry.gauges.values()) {
|
||||
for (const [key, value] of g.values) {
|
||||
const labels = g._parseKey(key);
|
||||
const labels = _parseLabelKey(key);
|
||||
insert.run(
|
||||
g.name,
|
||||
"gauge",
|
||||
|
|
@ -361,6 +484,10 @@ function persistMetricsToDb(registry, sessionId, db) {
|
|||
|
||||
function flushMetrics() {
|
||||
if (!_basePath) return;
|
||||
|
||||
const flushStartTime = Date.now();
|
||||
_flushCount++;
|
||||
|
||||
try {
|
||||
const text = getRegistry().buildText();
|
||||
const path = metricsFilePath(_basePath);
|
||||
|
|
@ -370,8 +497,35 @@ function flushMetrics() {
|
|||
if (_dbAdapter) {
|
||||
persistMetricsToDb(getRegistry(), _sessionId, _dbAdapter);
|
||||
}
|
||||
|
||||
// Update performance metrics
|
||||
_flushSuccessCount++;
|
||||
_lastFlushDuration = Date.now() - flushStartTime;
|
||||
_lastFlushTimestamp = Date.now();
|
||||
_totalFlushDuration += _lastFlushDuration;
|
||||
_flushFailures = 0;
|
||||
|
||||
// Record flush performance metrics
|
||||
try {
|
||||
getRegistry()
|
||||
.counter(
|
||||
"sf_metrics_flush_success_total",
|
||||
"Total successful metrics flushes",
|
||||
[],
|
||||
)
|
||||
.inc({}, 1);
|
||||
getRegistry()
|
||||
.gauge(
|
||||
"sf_metrics_flush_duration_ms",
|
||||
"Duration of last metrics flush in milliseconds",
|
||||
[],
|
||||
)
|
||||
.set({}, _lastFlushDuration);
|
||||
} catch {
|
||||
// Best effort - don't let metrics recording break the flush
|
||||
}
|
||||
} catch (err) {
|
||||
_flushFailureCount++;
|
||||
_flushFailures++;
|
||||
logWarning(
|
||||
"metrics-central",
|
||||
|
|
@ -411,6 +565,17 @@ export function initMetricsCentral(basePath, opts = {}) {
|
|||
_dbAdapter = opts.dbAdapter ?? null;
|
||||
const interval = opts.flushIntervalMs ?? FLUSH_INTERVAL_MS;
|
||||
|
||||
// Reset metrics system stats on fresh init
|
||||
if (!_flushTimer) {
|
||||
_metricsSystemStartTime = Date.now();
|
||||
_flushCount = 0;
|
||||
_flushSuccessCount = 0;
|
||||
_flushFailureCount = 0;
|
||||
_lastFlushDuration = 0;
|
||||
_lastFlushTimestamp = 0;
|
||||
_totalFlushDuration = 0;
|
||||
}
|
||||
|
||||
if (_flushTimer) clearInterval(_flushTimer);
|
||||
_flushTimer = setInterval(flushMetrics, interval);
|
||||
|
||||
|
|
@ -421,6 +586,64 @@ export function initMetricsCentral(basePath, opts = {}) {
|
|||
if (_dbAdapter) {
|
||||
ensureMetricsTable(_dbAdapter);
|
||||
}
|
||||
|
||||
// Start periodic metrics system health reporting
|
||||
if (!_metricsHealthTimer) {
|
||||
_metricsHealthTimer = setInterval(() => {
|
||||
try {
|
||||
updateMetricsSystemHealth();
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}, 300000); // Every 5 minutes
|
||||
if (_metricsHealthTimer.unref) _metricsHealthTimer.unref();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update metrics system health metrics.
|
||||
*/
|
||||
function updateMetricsSystemHealth() {
|
||||
const registry = getRegistry();
|
||||
try {
|
||||
// Record system uptime
|
||||
const uptime = Math.floor((Date.now() - _metricsSystemStartTime) / 1000);
|
||||
registry
|
||||
.gauge(
|
||||
"sf_metrics_system_uptime_seconds",
|
||||
"Metrics system uptime in seconds",
|
||||
[],
|
||||
)
|
||||
.set({}, uptime);
|
||||
|
||||
// Record database status
|
||||
registry
|
||||
.gauge(
|
||||
"sf_metrics_database_status",
|
||||
"Database connection status (1=connected, 0=disconnected)",
|
||||
["project_path"],
|
||||
)
|
||||
.set({ project_path: _basePath || "unknown" }, _dbAdapter ? 1 : 0);
|
||||
|
||||
// Record in-memory metrics count
|
||||
let totalMetrics = 0;
|
||||
totalMetrics += registry.counters.size;
|
||||
totalMetrics += registry.gauges.size;
|
||||
totalMetrics += registry.histograms.size;
|
||||
|
||||
registry
|
||||
.gauge(
|
||||
"sf_metrics_active_count",
|
||||
"Number of active metrics in memory",
|
||||
[],
|
||||
)
|
||||
.set({}, totalMetrics);
|
||||
} catch (err) {
|
||||
logWarning(
|
||||
"metrics-central",
|
||||
`Failed to update health metrics: ${err.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -431,6 +654,10 @@ export function stopMetricsCentral() {
|
|||
clearInterval(_flushTimer);
|
||||
_flushTimer = null;
|
||||
}
|
||||
if (_metricsHealthTimer) {
|
||||
clearInterval(_metricsHealthTimer);
|
||||
_metricsHealthTimer = null;
|
||||
}
|
||||
// Final flush attempt
|
||||
flushMetrics();
|
||||
_basePath = "";
|
||||
|
|
@ -511,6 +738,112 @@ export function recordCost(
|
|||
recordGauge("sf_cost_last", cost, { unit_id: unitId, model_id: modelId });
|
||||
}
|
||||
|
||||
/**
|
||||
* Record tool execution performance.
|
||||
*
|
||||
* @param {string} toolName — name of the tool
|
||||
* @param {number} durationMs — execution duration in milliseconds
|
||||
* @param {boolean} [isError] — whether the execution resulted in an error
|
||||
* @param {string} [errorType] — type of error if isError is true
|
||||
*/
|
||||
export function recordToolExecution(
|
||||
toolName,
|
||||
durationMs,
|
||||
isError = false,
|
||||
errorType = "",
|
||||
) {
|
||||
recordHistogram("sf_tool_execution_duration_ms", durationMs);
|
||||
if (isError) {
|
||||
recordCounter(
|
||||
"sf_tool_errors_total",
|
||||
{ tool_name: toolName, error_type: errorType || "unknown" },
|
||||
1,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record model request performance.
|
||||
*
|
||||
* @param {string} modelId — model identifier
|
||||
* @param {number} durationMs — request duration in milliseconds
|
||||
* @param {boolean} [isError] — whether the request resulted in an error
|
||||
* @param {string} [errorType] — type of error if isError is true
|
||||
*/
|
||||
export function recordModelRequest(
|
||||
modelId,
|
||||
durationMs,
|
||||
isError = false,
|
||||
errorType = "",
|
||||
) {
|
||||
recordHistogram("sf_model_request_duration_ms", durationMs);
|
||||
if (isError) {
|
||||
recordCounter(
|
||||
"sf_model_errors_total",
|
||||
{ model_id: modelId, error_type: errorType || "unknown" },
|
||||
1,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record database operation performance.
|
||||
*
|
||||
* @param {string} operation — database operation name
|
||||
* @param {number} durationMs — query duration in milliseconds
|
||||
* @param {boolean} [isError] — whether the operation resulted in an error
|
||||
* @param {string} [errorType] — type of error if isError is true
|
||||
*/
|
||||
export function recordDatabaseOperation(
|
||||
operation,
|
||||
durationMs,
|
||||
isError = false,
|
||||
errorType = "",
|
||||
) {
|
||||
recordHistogram("sf_database_query_duration_ms", durationMs);
|
||||
if (isError) {
|
||||
recordCounter(
|
||||
"sf_database_errors_total",
|
||||
{ operation, error_type: errorType || "unknown" },
|
||||
1,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record system warning.
|
||||
*
|
||||
* @param {string} component — system component that issued the warning
|
||||
* @param {string} warningType — type of warning
|
||||
*/
|
||||
export function recordSystemWarning(component, warningType) {
|
||||
recordCounter(
|
||||
"sf_system_warnings_total",
|
||||
{ component, warning_type: warningType },
|
||||
1,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update resource usage gauges.
|
||||
*
|
||||
* @param {object} resources — resource usage data
|
||||
* @param {number} [resources.activeSessions] — number of active sessions
|
||||
* @param {number} [resources.activeAgents] — number of active agents
|
||||
* @param {number} [resources.concurrentToolCalls] — number of concurrent tool calls
|
||||
*/
|
||||
export function updateResourceGauges(resources = {}) {
|
||||
if (resources.activeSessions !== undefined) {
|
||||
recordGauge("sf_active_sessions_count", resources.activeSessions);
|
||||
}
|
||||
if (resources.activeAgents !== undefined) {
|
||||
recordGauge("sf_active_agents_count", resources.activeAgents);
|
||||
}
|
||||
if (resources.concurrentToolCalls !== undefined) {
|
||||
recordGauge("sf_concurrent_tool_calls", resources.concurrentToolCalls);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current metrics text in Prometheus format.
|
||||
*/
|
||||
|
|
@ -673,10 +1006,70 @@ const METRIC_META = {
|
|||
labels: ["unit_id", "model_id"],
|
||||
},
|
||||
|
||||
// Performance tracking
|
||||
sf_session_start_duration_ms: {
|
||||
help: "Session start duration in milliseconds",
|
||||
buckets: [100, 250, 500, 1000, 2000, 5000],
|
||||
},
|
||||
sf_tool_execution_duration_ms: {
|
||||
help: "Tool execution duration in milliseconds",
|
||||
buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000],
|
||||
},
|
||||
sf_model_request_duration_ms: {
|
||||
help: "Model request duration in milliseconds",
|
||||
buckets: [100, 500, 1000, 2500, 5000, 10000, 30000, 60000],
|
||||
},
|
||||
sf_database_query_duration_ms: {
|
||||
help: "Database query duration in milliseconds",
|
||||
buckets: [1, 5, 10, 25, 50, 100, 250, 500],
|
||||
},
|
||||
|
||||
// Resource usage
|
||||
sf_active_sessions_count: {
|
||||
help: "Number of active sessions",
|
||||
},
|
||||
sf_active_agents_count: {
|
||||
help: "Number of active agents",
|
||||
},
|
||||
sf_concurrent_tool_calls: {
|
||||
help: "Number of concurrent tool calls",
|
||||
},
|
||||
|
||||
// Error tracking
|
||||
sf_tool_errors_total: {
|
||||
help: "Total tool execution errors",
|
||||
labels: ["tool_name", "error_type"],
|
||||
},
|
||||
sf_model_errors_total: {
|
||||
help: "Total model request errors",
|
||||
labels: ["model_id", "error_type"],
|
||||
},
|
||||
sf_database_errors_total: {
|
||||
help: "Total database operation errors",
|
||||
labels: ["operation", "error_type"],
|
||||
},
|
||||
sf_system_warnings_total: {
|
||||
help: "Total system warnings",
|
||||
labels: ["component", "warning_type"],
|
||||
},
|
||||
|
||||
// Internal
|
||||
sf_metrics_flush_failed_total: {
|
||||
help: "Total metrics flush failures",
|
||||
},
|
||||
sf_metrics_flush_success_total: {
|
||||
help: "Total successful metrics flushes",
|
||||
},
|
||||
sf_metrics_flush_duration_ms: {
|
||||
help: "Duration of last metrics flush in milliseconds",
|
||||
},
|
||||
sf_metrics_system_uptime_seconds: {
|
||||
help: "Metrics system uptime in seconds",
|
||||
},
|
||||
sf_metrics_database_status: {
|
||||
help: "Database connection status (1=connected, 0=disconnected)",
|
||||
labels: ["project_path"],
|
||||
},
|
||||
};
|
||||
|
||||
function getMetricMeta(name) {
|
||||
|
|
|
|||
266
src/resources/extensions/sf/session-recorder.js
Normal file
266
src/resources/extensions/sf/session-recorder.js
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
// session-recorder.js — per-process session lifecycle management
|
||||
//
|
||||
// Maintains in-memory state for the active session (id, turn index, pending
|
||||
// turn row id) and dispatches writes to sf-db.js on hook events. Keeping
|
||||
// this state here — not in register-hooks.js — lets it be reset cleanly
|
||||
// across session_switch events and tested in isolation.
|
||||
//
|
||||
// Purpose: bridge the Pi/Copilot hook lifecycle into the sf.db session layer
|
||||
// so every user↔assistant exchange is persisted as a searchable turns row,
|
||||
// promoted into memory_sources for future LLM extraction + vector embedding,
|
||||
// and cross-session learning has the structural inputs it needs.
|
||||
//
|
||||
// Consumer: register-hooks.js session_start, before_agent_start, agent_end,
|
||||
// tool_call, session_shutdown hooks.
|
||||
|
||||
import { createMemorySource } from "./memory-source-store.js";
|
||||
import {
|
||||
insertSessionTurn,
|
||||
patchTurnResponse,
|
||||
recordSessionFileTouch,
|
||||
recordSessionRef,
|
||||
upsertSession,
|
||||
} from "./sf-db.js";
|
||||
|
||||
// ── Write-class tools whose input.path counts as a file touch ──────────────
|
||||
const WRITE_TOOL_NAMES = new Set([
|
||||
"edit_file",
|
||||
"create_file",
|
||||
"write_file",
|
||||
"str_replace_editor",
|
||||
"str_replace_based_edit_tool",
|
||||
"rewrite_file",
|
||||
"insert_content",
|
||||
"delete_file",
|
||||
]);
|
||||
|
||||
// ── Ref patterns extracted from turn text ──────────────────────────────────
|
||||
const REF_PATTERNS = [
|
||||
{ type: "pr", re: /\bPR\s*#(\d+)\b|\bpull.request[/ #]+(\d+)\b/gi },
|
||||
{ type: "issue", re: /\bissue\s*#(\d+)\b|\bGH-(\d+)\b/gi },
|
||||
{ type: "commit", re: /\b([0-9a-f]{7,40})\b/g },
|
||||
{ type: "branch", re: /\borigin\/([^\s"'`]+)\b|\bbranch[: ]+([^\s"'`]+)/gi },
|
||||
];
|
||||
|
||||
// ── Module-level active session state ─────────────────────────────────────
|
||||
let _sessionId = null;
|
||||
let _turnIndex = -1;
|
||||
let _pendingTurnId = null;
|
||||
/** User message text captured in recordTurnStart — used in promoteTurnToMemorySource. */
|
||||
let _pendingUserMessage = null;
|
||||
|
||||
/**
|
||||
* Reset all session state. Called on session_start and session_switch to
|
||||
* ensure each host session gets a clean slate.
|
||||
*
|
||||
* Purpose: prevent stale state from a previous session bleeding into the next.
|
||||
* Consumer: initSessionRecorder, session_switch handler.
|
||||
*/
|
||||
export function resetSessionRecorder() {
|
||||
_sessionId = null;
|
||||
_turnIndex = -1;
|
||||
_pendingTurnId = null;
|
||||
_pendingUserMessage = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the recorder for a new session. Upserts the sessions row and
|
||||
* resets per-turn counters. Safe to call multiple times — upsertSession is
|
||||
* idempotent.
|
||||
*
|
||||
* Purpose: establish the session row that all subsequent turns hang off.
|
||||
* Consumer: register-hooks.js session_start.
|
||||
*/
|
||||
export function initSessionRecorder(
|
||||
sessionId,
|
||||
{ mode, cwd, repo, branch } = {},
|
||||
) {
|
||||
resetSessionRecorder();
|
||||
if (!sessionId) return;
|
||||
_sessionId = sessionId;
|
||||
_turnIndex = 0;
|
||||
upsertSession({
|
||||
sessionId,
|
||||
mode: mode ?? "interactive",
|
||||
cwd: cwd ?? process.cwd(),
|
||||
repo,
|
||||
branch,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Record the start of a new turn (user message). Inserts the turns row with
|
||||
* the user_message immediately so the record exists even if agent_end never
|
||||
* fires (e.g. crash or interrupt). Also scans for ref mentions.
|
||||
*
|
||||
* Returns the DB row id so tool-call handlers can link touches to this turn.
|
||||
*
|
||||
* Purpose: store the user's intent before the agent processes it.
|
||||
* Consumer: register-hooks.js before_agent_start.
|
||||
*/
|
||||
export function recordTurnStart(userMessage) {
|
||||
if (!_sessionId) return null;
|
||||
const ts = new Date().toISOString();
|
||||
const id = insertSessionTurn({
|
||||
sessionId: _sessionId,
|
||||
turnIndex: _turnIndex,
|
||||
userMessage: userMessage ?? null,
|
||||
ts,
|
||||
});
|
||||
_pendingTurnId = id;
|
||||
_pendingUserMessage = userMessage ?? null;
|
||||
if (userMessage) extractAndRecordRefs(userMessage, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Patch the assistant_response on the pending turn row. Called from agent_end.
|
||||
* Also promotes the completed turn into memory_sources so the content is
|
||||
* available for `/memory rebuild` (LLM extraction → memories rows) and
|
||||
* eventual vector embedding via runEmbeddingBackfill. Advances the turn
|
||||
* index so the next turn gets a fresh slot.
|
||||
*
|
||||
* Purpose: complete the turn record so both halves are searchable as a unit,
|
||||
* and make the raw turn text a first-class memory source for retrieval.
|
||||
* Consumer: register-hooks.js agent_end.
|
||||
*/
|
||||
export function recordTurnEnd(assistantResponse) {
|
||||
if (!_sessionId) return;
|
||||
if (assistantResponse) {
|
||||
patchTurnResponse(_sessionId, _turnIndex, assistantResponse);
|
||||
extractAndRecordRefs(assistantResponse, _pendingTurnId);
|
||||
}
|
||||
// Promote the completed turn into memory_sources for future LLM extraction
|
||||
// + vector embedding. Both halves are available here — user message was
|
||||
// captured in recordTurnStart and assistant text arrives now.
|
||||
promoteTurnToMemorySource(_turnIndex, _pendingUserMessage, assistantResponse);
|
||||
_pendingTurnId = null;
|
||||
_pendingUserMessage = null;
|
||||
_turnIndex += 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a file path as touched in the current session. No-op when no active
|
||||
* session or when the tool is not a write-class tool.
|
||||
*
|
||||
* Purpose: build the session_file_touches index without requiring a full
|
||||
* audit-event scan.
|
||||
*
|
||||
* Consumer: register-hooks.js tool_call.
|
||||
*/
|
||||
export function recordFileTouch(toolName, filePath) {
|
||||
if (!_sessionId || !filePath) return;
|
||||
if (!WRITE_TOOL_NAMES.has(toolName)) return;
|
||||
recordSessionFileTouch({
|
||||
sessionId: _sessionId,
|
||||
path: filePath,
|
||||
toolName,
|
||||
turnId: _pendingTurnId,
|
||||
firstSeenAt: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the session summary (e.g. from a compaction summary). The summary
|
||||
* is the most recent compaction digest — useful as a semantic handle for
|
||||
* memory-pipeline promotion. Also persists the summary as a memory_source
|
||||
* with kind="session" so `/memory rebuild` can extract durable knowledge
|
||||
* from it without requiring an LLM call at compaction time.
|
||||
*
|
||||
* Purpose: give memory retrieval a compact description of each session's work.
|
||||
* Consumer: register-hooks.js session_before_compact result.
|
||||
*/
|
||||
export function updateSessionSummary(summary) {
|
||||
if (!_sessionId || !summary) return;
|
||||
upsertSession({ sessionId: _sessionId, summary });
|
||||
// Promote into memory_sources for future LLM extraction + embedding.
|
||||
// The [session:<id>] prefix enables idempotent detection on re-compact.
|
||||
try {
|
||||
const content = `[session:${_sessionId}] ${summary.slice(0, 1000)}`;
|
||||
createMemorySource({
|
||||
kind: "session",
|
||||
uri: null,
|
||||
title: `Session ${_sessionId.slice(0, 8)} summary`,
|
||||
content,
|
||||
scope: "project",
|
||||
tags: ["session", "summary"],
|
||||
});
|
||||
} catch {
|
||||
/* non-fatal: memory source creation must never block compaction */
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Patch the session's repo + branch when git context becomes available after
|
||||
* initial startup (common when cwd is set before the DB is open).
|
||||
*
|
||||
* Purpose: back-fill attribution data so session→repo queries work.
|
||||
* Consumer: register-hooks.js after ensureDbOpen resolves.
|
||||
*/
|
||||
export function patchSessionGitContext(repo, branch) {
|
||||
if (!_sessionId) return;
|
||||
upsertSession({
|
||||
sessionId: _sessionId,
|
||||
repo: repo ?? null,
|
||||
branch: branch ?? null,
|
||||
});
|
||||
}
|
||||
|
||||
// ── Helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Synthesize a memory_source entry from a completed turn so the conversation
|
||||
* text feeds the memory pipeline. The source can later be processed by
|
||||
* `/memory rebuild` (LLM extraction) or embedded via runEmbeddingBackfill.
|
||||
*
|
||||
* Content format: `[turn:<session_id>:<turn_index>] Q: <user_msg> A: <resp>`
|
||||
* The structured prefix enables idempotent detection across rebuilds.
|
||||
*
|
||||
* Silently no-ops when either half of the turn is absent, the session is not
|
||||
* initialised, or createMemorySource fails — turn recording must never block.
|
||||
*/
|
||||
function promoteTurnToMemorySource(turnIndex, userMessage, assistantResponse) {
|
||||
if (!_sessionId) return;
|
||||
// Only promote turns that have at least one non-trivial text half.
|
||||
const user = (userMessage ?? "").trim();
|
||||
const asst = (assistantResponse ?? "").trim();
|
||||
if (!user && !asst) return;
|
||||
try {
|
||||
const prefix = `[turn:${_sessionId}:${turnIndex}]`;
|
||||
const qPart = user ? `Q: ${user.slice(0, 400)}` : "";
|
||||
const aPart = asst ? `A: ${asst.slice(0, 400)}` : "";
|
||||
const content = [prefix, qPart, aPart].filter(Boolean).join(" ");
|
||||
createMemorySource({
|
||||
kind: "turn",
|
||||
uri: null,
|
||||
title: `Turn ${_sessionId.slice(0, 8)}:${turnIndex}`,
|
||||
content,
|
||||
scope: "project",
|
||||
tags: ["turn", "session"],
|
||||
});
|
||||
} catch {
|
||||
/* non-fatal */
|
||||
}
|
||||
}
|
||||
|
||||
function extractAndRecordRefs(text, turnId) {
|
||||
if (!_sessionId || !text) return;
|
||||
const now = new Date().toISOString();
|
||||
for (const { type, re } of REF_PATTERNS) {
|
||||
re.lastIndex = 0;
|
||||
let m;
|
||||
while ((m = re.exec(text)) !== null) {
|
||||
const value = (m[1] ?? m[2] ?? "").trim();
|
||||
// Skip short hex strings that are likely not real commit SHAs
|
||||
if (type === "commit" && value.length < 7) continue;
|
||||
if (!value) continue;
|
||||
recordSessionRef({
|
||||
sessionId: _sessionId,
|
||||
refType: type,
|
||||
refValue: value,
|
||||
turnId: turnId ?? null,
|
||||
createdAt: now,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -18,8 +18,18 @@
|
|||
// The separate `.sf/unit-claims.db` managed by `unit-ownership.ts` is an
|
||||
// intentionally independent store for cross-worktree claim races and is
|
||||
// excluded from this invariant.
|
||||
import { copyFileSync, existsSync, mkdirSync, realpathSync } from "node:fs";
|
||||
import { dirname } from "node:path";
|
||||
import {
|
||||
copyFileSync,
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
readdirSync,
|
||||
readFileSync,
|
||||
realpathSync,
|
||||
statSync,
|
||||
unlinkSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { DatabaseSync } from "node:sqlite";
|
||||
import { SF_STALE_STATE, SFError } from "./errors.js";
|
||||
import { getGateIdsForTurn } from "./gate-registry.js";
|
||||
|
|
@ -48,6 +58,9 @@ function normalizeRows(rows) {
|
|||
return rows.map((r) => normalizeRow(r));
|
||||
}
|
||||
const DB_QUERY_TIMEOUT_MS = 30_000;
|
||||
const DB_BACKUP_MIN_INTERVAL_MS = 15 * 60 * 1000;
|
||||
const DB_BACKUP_RETENTION = 24;
|
||||
const DB_FULL_VACUUM_MIN_INTERVAL_MS = 6 * 60 * 60 * 1000;
|
||||
|
||||
function createAdapter(rawDb) {
|
||||
const db = rawDb;
|
||||
|
|
@ -114,7 +127,124 @@ function openRawDb(path) {
|
|||
loadProvider();
|
||||
return new DatabaseSync(path);
|
||||
}
|
||||
const SCHEMA_VERSION = 47;
|
||||
function sqliteStringLiteral(value) {
|
||||
return `'${String(value).replaceAll("'", "''")}'`;
|
||||
}
|
||||
function databaseBackupDir(path) {
|
||||
return join(dirname(path), "backups", "db");
|
||||
}
|
||||
function latestDatabaseBackupMtime(dir) {
|
||||
if (!existsSync(dir)) return 0;
|
||||
let latest = 0;
|
||||
for (const entry of readdirSync(dir)) {
|
||||
if (!entry.startsWith("sf.db.")) continue;
|
||||
const file = join(dir, entry);
|
||||
try {
|
||||
const stat = statSync(file);
|
||||
if (stat.isFile() && stat.mtimeMs > latest) latest = stat.mtimeMs;
|
||||
} catch {
|
||||
// Ignore files that disappear during pruning.
|
||||
}
|
||||
}
|
||||
return latest;
|
||||
}
|
||||
function pruneDatabaseBackups(dir) {
|
||||
if (!existsSync(dir)) return;
|
||||
const backups = [];
|
||||
for (const entry of readdirSync(dir)) {
|
||||
if (!entry.startsWith("sf.db.")) continue;
|
||||
const file = join(dir, entry);
|
||||
try {
|
||||
const stat = statSync(file);
|
||||
if (stat.isFile()) backups.push({ file, mtimeMs: stat.mtimeMs });
|
||||
} catch {
|
||||
// Ignore files that disappear during pruning.
|
||||
}
|
||||
}
|
||||
backups.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
||||
for (const backup of backups.slice(DB_BACKUP_RETENTION)) {
|
||||
try {
|
||||
unlinkSync(backup.file);
|
||||
} catch {
|
||||
// Best-effort retention; never block DB open on pruning.
|
||||
}
|
||||
}
|
||||
}
|
||||
function databaseMaintenancePath(path) {
|
||||
return join(databaseBackupDir(path), "maintenance.json");
|
||||
}
|
||||
function readDatabaseMaintenanceState(path) {
|
||||
try {
|
||||
return JSON.parse(readFileSync(databaseMaintenancePath(path), "utf-8"));
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
function writeDatabaseMaintenanceState(path, state) {
|
||||
try {
|
||||
writeFileSync(
|
||||
databaseMaintenancePath(path),
|
||||
JSON.stringify(state, null, 2) + "\n",
|
||||
"utf-8",
|
||||
);
|
||||
} catch {
|
||||
// Best-effort maintenance metadata.
|
||||
}
|
||||
}
|
||||
function createDatabaseSnapshot(rawDb, path) {
|
||||
if (path === ":memory:" || process.env.SF_DB_BACKUP_DISABLE === "1") return;
|
||||
const dir = databaseBackupDir(path);
|
||||
try {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
const latest = latestDatabaseBackupMtime(dir);
|
||||
if (latest > 0 && Date.now() - latest < DB_BACKUP_MIN_INTERVAL_MS) return;
|
||||
const stamp = new Date().toISOString().replace(/[:.]/g, "-");
|
||||
const backupPath = join(dir, `sf.db.${stamp}`);
|
||||
rawDb.exec(`VACUUM INTO ${sqliteStringLiteral(backupPath)}`);
|
||||
pruneDatabaseBackups(dir);
|
||||
} catch (err) {
|
||||
logWarning(
|
||||
"sf-db",
|
||||
`database snapshot failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
function performDatabaseMaintenance(rawDb, path) {
|
||||
if (path === ":memory:" || process.env.SF_DB_MAINTENANCE_DISABLE === "1")
|
||||
return;
|
||||
try {
|
||||
const quickCheck = rawDb.prepare("PRAGMA quick_check").get();
|
||||
if (quickCheck?.quick_check !== "ok") {
|
||||
logWarning("sf-db", "database quick_check failed; skipping maintenance");
|
||||
return;
|
||||
}
|
||||
rawDb.exec("PRAGMA wal_checkpoint(PASSIVE)");
|
||||
rawDb.exec("PRAGMA optimize");
|
||||
rawDb.exec("PRAGMA incremental_vacuum(128)");
|
||||
|
||||
const state = readDatabaseMaintenanceState(path);
|
||||
const lastFullVacuumAt =
|
||||
typeof state.lastFullVacuumAt === "string"
|
||||
? Date.parse(state.lastFullVacuumAt)
|
||||
: 0;
|
||||
if (
|
||||
!Number.isFinite(lastFullVacuumAt) ||
|
||||
Date.now() - lastFullVacuumAt >= DB_FULL_VACUUM_MIN_INTERVAL_MS
|
||||
) {
|
||||
rawDb.exec("VACUUM");
|
||||
writeDatabaseMaintenanceState(path, {
|
||||
...state,
|
||||
lastFullVacuumAt: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
logWarning(
|
||||
"sf-db",
|
||||
`database maintenance failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
const SCHEMA_VERSION = 49;
|
||||
function indexExists(db, name) {
|
||||
return !!db
|
||||
.prepare(
|
||||
|
|
@ -269,6 +399,125 @@ function ensureSolverEvalTables(db) {
|
|||
"CREATE INDEX IF NOT EXISTS idx_solver_eval_case_false_complete ON solver_eval_case_results(false_complete, mode)",
|
||||
);
|
||||
}
|
||||
function ensureSessionTables(db) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS sessions (
|
||||
session_id TEXT PRIMARY KEY,
|
||||
trace_id TEXT DEFAULT NULL,
|
||||
mode TEXT NOT NULL DEFAULT 'interactive',
|
||||
cwd TEXT NOT NULL DEFAULT '',
|
||||
repo TEXT DEFAULT NULL,
|
||||
branch TEXT DEFAULT NULL,
|
||||
summary TEXT DEFAULT NULL,
|
||||
summary_count INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS turns (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL REFERENCES sessions(session_id) ON DELETE CASCADE,
|
||||
turn_index INTEGER NOT NULL,
|
||||
user_message TEXT,
|
||||
assistant_response TEXT,
|
||||
ts TEXT NOT NULL,
|
||||
UNIQUE(session_id, turn_index)
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS session_file_touches (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL REFERENCES sessions(session_id) ON DELETE CASCADE,
|
||||
path TEXT NOT NULL,
|
||||
tool_name TEXT DEFAULT NULL,
|
||||
turn_id INTEGER DEFAULT NULL REFERENCES turns(id),
|
||||
first_seen_at TEXT NOT NULL,
|
||||
UNIQUE(session_id, path)
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS session_refs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL REFERENCES sessions(session_id) ON DELETE CASCADE,
|
||||
ref_type TEXT NOT NULL,
|
||||
ref_value TEXT NOT NULL,
|
||||
turn_id INTEGER DEFAULT NULL REFERENCES turns(id),
|
||||
created_at TEXT NOT NULL,
|
||||
UNIQUE(session_id, ref_type, ref_value)
|
||||
)
|
||||
`);
|
||||
// FTS5 external-content table over turns for keyword recall.
|
||||
// content_rowid links to turns.id; triggers below keep it in sync.
|
||||
db.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS turns_fts USING fts5(
|
||||
user_message,
|
||||
assistant_response,
|
||||
content='turns',
|
||||
content_rowid='id'
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TRIGGER IF NOT EXISTS turns_fts_insert AFTER INSERT ON turns BEGIN
|
||||
INSERT INTO turns_fts(rowid, user_message, assistant_response)
|
||||
VALUES (new.id, new.user_message, new.assistant_response);
|
||||
END
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TRIGGER IF NOT EXISTS turns_fts_update AFTER UPDATE ON turns BEGIN
|
||||
INSERT INTO turns_fts(turns_fts, rowid, user_message, assistant_response)
|
||||
VALUES ('delete', old.id, old.user_message, old.assistant_response);
|
||||
INSERT INTO turns_fts(rowid, user_message, assistant_response)
|
||||
VALUES (new.id, new.user_message, new.assistant_response);
|
||||
END
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TRIGGER IF NOT EXISTS turns_fts_delete AFTER DELETE ON turns BEGIN
|
||||
INSERT INTO turns_fts(turns_fts, rowid, user_message, assistant_response)
|
||||
VALUES ('delete', old.id, old.user_message, old.assistant_response);
|
||||
END
|
||||
`);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_sessions_created ON sessions(created_at DESC)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_sessions_repo ON sessions(repo, created_at DESC)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_turns_session ON turns(session_id, turn_index)",
|
||||
);
|
||||
db.exec("CREATE INDEX IF NOT EXISTS idx_turns_ts ON turns(ts DESC)");
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_session_file_touches_session ON session_file_touches(session_id, first_seen_at DESC)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_session_file_touches_path ON session_file_touches(path, session_id)",
|
||||
);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_session_refs_session ON session_refs(session_id, created_at DESC)",
|
||||
);
|
||||
}
|
||||
function ensureSessionSnapshotTable(db) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS session_snapshots (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
-- Session that triggered this checkpoint. FK to sessions(session_id).
|
||||
session_id TEXT NOT NULL,
|
||||
-- Zero-based counter within the session (first snapshot = 0).
|
||||
snapshot_index INTEGER NOT NULL DEFAULT 0,
|
||||
-- Optional git stash ref so the snapshot can be restored exactly.
|
||||
-- NULL when the working tree had no changes to stash.
|
||||
git_stash_ref TEXT,
|
||||
-- Free-text label for the snapshot (e.g. "before migration deploy").
|
||||
label TEXT,
|
||||
ts TEXT NOT NULL,
|
||||
UNIQUE(session_id, snapshot_index)
|
||||
)
|
||||
`);
|
||||
db.exec(
|
||||
"CREATE INDEX IF NOT EXISTS idx_session_snapshots_session ON session_snapshots(session_id, snapshot_index)",
|
||||
);
|
||||
}
|
||||
function ensureHeadlessRunTables(db) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS headless_runs (
|
||||
|
|
@ -1038,6 +1287,8 @@ function initSchema(db, fileBacked) {
|
|||
ensureScheduleTables(db);
|
||||
ensureSolverEvalTables(db);
|
||||
ensureHeadlessRunTables(db);
|
||||
ensureSessionTables(db);
|
||||
ensureSessionSnapshotTable(db);
|
||||
ensureUokMessageTables(db);
|
||||
ensureSpecSchemaTables(db);
|
||||
ensureTaskFrontmatterColumns(db);
|
||||
|
|
@ -2592,9 +2843,7 @@ function migrateSchema(db) {
|
|||
.all()
|
||||
.map((c) => c.name);
|
||||
if (cols.includes("superseded_by")) {
|
||||
db.exec(
|
||||
"ALTER TABLE validation_runs DROP COLUMN superseded_by",
|
||||
);
|
||||
db.exec("ALTER TABLE validation_runs DROP COLUMN superseded_by");
|
||||
}
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
|
|
@ -2603,6 +2852,58 @@ function migrateSchema(db) {
|
|||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
if (currentVersion < 48) {
|
||||
// Session layer: create tables, backfill from existing headless_runs and
|
||||
// audit_turn_index so historical data is queryable from day one.
|
||||
// Message text will be NULL for backfilled turns — it was never stored.
|
||||
ensureSessionTables(db);
|
||||
// Backfill: one session per headless run.
|
||||
db.exec(`
|
||||
INSERT OR IGNORE INTO sessions (session_id, trace_id, mode, cwd, created_at, updated_at)
|
||||
SELECT run_id, NULL, 'headless', '', created_at, updated_at
|
||||
FROM headless_runs
|
||||
`);
|
||||
// Backfill: one session per distinct trace_id in audit_turn_index.
|
||||
// Reconstruct created_at/updated_at from the min/max timestamps.
|
||||
db.exec(`
|
||||
INSERT OR IGNORE INTO sessions (session_id, trace_id, mode, cwd, created_at, updated_at)
|
||||
SELECT trace_id, trace_id, 'interactive',
|
||||
'', MIN(first_ts), MAX(last_ts)
|
||||
FROM audit_turn_index
|
||||
GROUP BY trace_id
|
||||
`);
|
||||
// Backfill: one turn row per (trace_id, turn_id) in audit_turn_index.
|
||||
// turn_index derived from row order within trace; message text is NULL.
|
||||
db.exec(`
|
||||
INSERT OR IGNORE INTO turns (session_id, turn_index, user_message, assistant_response, ts)
|
||||
SELECT
|
||||
trace_id,
|
||||
ROW_NUMBER() OVER (PARTITION BY trace_id ORDER BY first_ts) - 1,
|
||||
NULL, NULL,
|
||||
first_ts
|
||||
FROM audit_turn_index
|
||||
`);
|
||||
// Rebuild FTS index from any turns that have text.
|
||||
// None from backfill yet, but required so the FTS table is consistent.
|
||||
db.exec(`INSERT INTO turns_fts(turns_fts) VALUES ('rebuild')`);
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
).run({
|
||||
":version": 48,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
if (currentVersion < 49) {
|
||||
// Add session_snapshots table — checkpoints before irreversible ops.
|
||||
// Safe to call on fresh DBs too (CREATE TABLE IF NOT EXISTS).
|
||||
ensureSessionSnapshotTable(db);
|
||||
db.prepare(
|
||||
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
|
||||
).run({
|
||||
":version": 49,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
db.exec("COMMIT");
|
||||
} catch (err) {
|
||||
db.exec("ROLLBACK");
|
||||
|
|
@ -2655,6 +2956,8 @@ export function openDatabase(path) {
|
|||
const fileBacked = path !== ":memory:";
|
||||
try {
|
||||
initSchema(adapter, fileBacked);
|
||||
createDatabaseSnapshot(rawDb, path);
|
||||
performDatabaseMaintenance(rawDb, path);
|
||||
} catch (err) {
|
||||
// Corrupt freelist: DDL fails with "malformed" but VACUUM can rebuild.
|
||||
// Attempt VACUUM recovery before giving up (see #2519).
|
||||
|
|
@ -6591,6 +6894,246 @@ export function listHeadlessRuns(limit = 20) {
|
|||
.all({ ":limit": Math.max(1, Math.min(100, Number(limit) || 20)) })
|
||||
.map(headlessRunFromRow);
|
||||
}
|
||||
/**
|
||||
* Upsert a session row. Creates on first call; updates updated_at, branch,
|
||||
* repo, and summary on subsequent calls. Safe to call on every session_start
|
||||
* and again when context becomes available (e.g. after git detection).
|
||||
*
|
||||
* Purpose: establish the session entity that all turns, file-touches, and
|
||||
* refs hang off — the missing structural layer for cross-session learning.
|
||||
*
|
||||
* Consumer: session-recorder.js on session_start and session_shutdown hooks.
|
||||
*/
|
||||
export function upsertSession(entry) {
|
||||
if (!currentDb) return;
|
||||
const now = new Date().toISOString();
|
||||
currentDb
|
||||
.prepare(`INSERT INTO sessions
|
||||
(session_id, trace_id, mode, cwd, repo, branch, summary, summary_count, created_at, updated_at)
|
||||
VALUES (:session_id, :trace_id, :mode, :cwd, :repo, :branch, :summary, 0, :now, :now)
|
||||
ON CONFLICT(session_id) DO UPDATE SET
|
||||
trace_id = COALESCE(excluded.trace_id, sessions.trace_id),
|
||||
repo = COALESCE(excluded.repo, sessions.repo),
|
||||
branch = COALESCE(excluded.branch, sessions.branch),
|
||||
summary = COALESCE(excluded.summary, sessions.summary),
|
||||
summary_count = CASE WHEN excluded.summary IS NOT NULL
|
||||
THEN sessions.summary_count + 1
|
||||
ELSE sessions.summary_count END,
|
||||
updated_at = excluded.updated_at`)
|
||||
.run({
|
||||
":session_id": entry.sessionId,
|
||||
":trace_id": entry.traceId ?? null,
|
||||
":mode": entry.mode ?? "interactive",
|
||||
":cwd": entry.cwd ?? "",
|
||||
":repo": entry.repo ?? null,
|
||||
":branch": entry.branch ?? null,
|
||||
":summary": entry.summary ?? null,
|
||||
":now": now,
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Insert a turn row for a session. Returns the new turn's integer id so the
|
||||
* caller can link subsequent file-touches and refs to it.
|
||||
*
|
||||
* Purpose: record every user↔assistant exchange so turn text is searchable
|
||||
* via turns_fts and promotable into the memory pipeline.
|
||||
*
|
||||
* Consumer: session-recorder.js on before_agent_start (user_message) and
|
||||
* agent_end (assistant_response patch).
|
||||
*/
|
||||
export function insertSessionTurn(entry) {
|
||||
if (!currentDb) return null;
|
||||
const result = currentDb
|
||||
.prepare(`INSERT INTO turns
|
||||
(session_id, turn_index, user_message, assistant_response, ts)
|
||||
VALUES (:session_id, :turn_index, :user_message, :assistant_response, :ts)
|
||||
ON CONFLICT(session_id, turn_index) DO UPDATE SET
|
||||
user_message = COALESCE(excluded.user_message, turns.user_message),
|
||||
assistant_response = COALESCE(excluded.assistant_response, turns.assistant_response)`)
|
||||
.run({
|
||||
":session_id": entry.sessionId,
|
||||
":turn_index": entry.turnIndex,
|
||||
":user_message": entry.userMessage ?? null,
|
||||
":assistant_response": entry.assistantResponse ?? null,
|
||||
":ts": entry.ts ?? new Date().toISOString(),
|
||||
});
|
||||
return result.lastInsertRowid ?? null;
|
||||
}
|
||||
/**
|
||||
* Patch the assistant_response on an existing turn row. Called from agent_end
|
||||
* after the model finishes so the full response is stored alongside the prompt.
|
||||
*
|
||||
* Purpose: complete the turn record so both halves of the exchange are
|
||||
* searchable and promotable as a unit.
|
||||
*
|
||||
* Consumer: session-recorder.js on agent_end.
|
||||
*/
|
||||
export function patchTurnResponse(sessionId, turnIndex, assistantResponse) {
|
||||
if (!currentDb) return;
|
||||
currentDb
|
||||
.prepare(`UPDATE turns SET assistant_response = :resp
|
||||
WHERE session_id = :sid AND turn_index = :idx AND assistant_response IS NULL`)
|
||||
.run({
|
||||
":resp": assistantResponse,
|
||||
":sid": sessionId,
|
||||
":idx": turnIndex,
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Record that a file path was touched in a session. UNIQUE(session_id, path)
|
||||
* means repeated touches in one session are collapsed to a single row —
|
||||
* only first_seen_at and tool_name (of the first touch) are retained.
|
||||
*
|
||||
* Purpose: enable "which files did I touch last session?" and cross-session
|
||||
* file-history queries without storing a full audit log per touch.
|
||||
*
|
||||
* Consumer: session-recorder.js on tool_call for write-class tools.
|
||||
*/
|
||||
export function recordSessionFileTouch(entry) {
|
||||
if (!currentDb) return;
|
||||
currentDb
|
||||
.prepare(`INSERT OR IGNORE INTO session_file_touches
|
||||
(session_id, path, tool_name, turn_id, first_seen_at)
|
||||
VALUES (:session_id, :path, :tool_name, :turn_id, :first_seen_at)`)
|
||||
.run({
|
||||
":session_id": entry.sessionId,
|
||||
":path": entry.path,
|
||||
":tool_name": entry.toolName ?? null,
|
||||
":turn_id": entry.turnId ?? null,
|
||||
":first_seen_at": entry.firstSeenAt ?? new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Record a PR / issue / commit / branch ref mentioned in a session. Idempotent
|
||||
* via UNIQUE(session_id, ref_type, ref_value).
|
||||
*
|
||||
* Purpose: make sessions queryable by the work items they touched so
|
||||
* "what session created PR #42?" is a single indexed lookup.
|
||||
*
|
||||
* Consumer: session-recorder.js when refs are detected in turn text.
|
||||
*/
|
||||
export function recordSessionRef(entry) {
|
||||
if (!currentDb) return;
|
||||
currentDb
|
||||
.prepare(`INSERT OR IGNORE INTO session_refs
|
||||
(session_id, ref_type, ref_value, turn_id, created_at)
|
||||
VALUES (:session_id, :ref_type, :ref_value, :turn_id, :created_at)`)
|
||||
.run({
|
||||
":session_id": entry.sessionId,
|
||||
":ref_type": entry.refType,
|
||||
":ref_value": entry.refValue,
|
||||
":turn_id": entry.turnId ?? null,
|
||||
":created_at": entry.createdAt ?? new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Full-text search across turns via the FTS5 turns_fts virtual table.
|
||||
* Returns matching turns with their session metadata ordered by relevance.
|
||||
*
|
||||
* Purpose: power cross-session keyword recall — "what did I ask about auth?",
|
||||
* "find sessions where I worked on retry handling".
|
||||
*
|
||||
* Consumer: sf memory search, context-injection, and /session search command.
|
||||
*/
|
||||
export function searchSessionTurns(query, limit = 20) {
|
||||
if (!currentDb) return [];
|
||||
return currentDb
|
||||
.prepare(`SELECT t.id, t.session_id, t.turn_index, t.ts,
|
||||
t.user_message, t.assistant_response,
|
||||
s.mode, s.cwd, s.repo, s.branch
|
||||
FROM turns_fts
|
||||
JOIN turns t ON turns_fts.rowid = t.id
|
||||
JOIN sessions s ON t.session_id = s.session_id
|
||||
WHERE turns_fts MATCH :query
|
||||
ORDER BY rank
|
||||
LIMIT :limit`)
|
||||
.all({ ":query": query, ":limit": Math.max(1, Math.min(100, limit)) });
|
||||
}
|
||||
/**
|
||||
* List recent sessions with their turn count and last-touched file count.
|
||||
* Useful for /session list and for memory-pipeline ingestion sweeps.
|
||||
*
|
||||
* Consumer: trajectory-command, memory-ingest, doctor checks.
|
||||
*/
|
||||
export function listRecentSessions(limit = 20) {
|
||||
if (!currentDb) return [];
|
||||
return currentDb
|
||||
.prepare(`SELECT s.session_id, s.mode, s.cwd, s.repo, s.branch,
|
||||
s.summary, s.created_at, s.updated_at,
|
||||
COUNT(DISTINCT t.id) AS turn_count,
|
||||
COUNT(DISTINCT f.id) AS file_count
|
||||
FROM sessions s
|
||||
LEFT JOIN turns t ON t.session_id = s.session_id
|
||||
LEFT JOIN session_file_touches f ON f.session_id = s.session_id
|
||||
GROUP BY s.session_id
|
||||
ORDER BY s.updated_at DESC
|
||||
LIMIT :limit`)
|
||||
.all({ ":limit": Math.max(1, Math.min(100, limit)) });
|
||||
}
|
||||
/**
|
||||
* Record a snapshot checkpoint before an irreversible operation. Idempotent
|
||||
* within a session: the snapshot_index is auto-incremented from the current
|
||||
* max so callers can create multiple checkpoints per session without
|
||||
* coordination.
|
||||
*
|
||||
* Purpose: give session_snapshots a first-class row so recovery paths and
|
||||
* irreversible-ops gates can reference the stash ref and label without
|
||||
* parsing free-text.
|
||||
*
|
||||
* Consumer: irreversible-ops safety gate (session_before_compact, future
|
||||
* verify steps that call git stash before destructive actions).
|
||||
*
|
||||
* @param {{ sessionId: string, gitStashRef?: string|null, label?: string|null, ts?: string }} args
|
||||
* @returns {number} The row id of the inserted snapshot (or 0 on failure).
|
||||
*/
|
||||
export function insertSessionSnapshot(args) {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
const nextIndex = (() => {
|
||||
const row = currentDb
|
||||
.prepare(
|
||||
"SELECT COALESCE(MAX(snapshot_index), -1) + 1 AS nxt FROM session_snapshots WHERE session_id = :sid",
|
||||
)
|
||||
.get({ ":sid": args.sessionId });
|
||||
return row ? Number(row["nxt"]) : 0;
|
||||
})();
|
||||
currentDb
|
||||
.prepare(`INSERT INTO session_snapshots
|
||||
(session_id, snapshot_index, git_stash_ref, label, ts)
|
||||
VALUES (:sid, :idx, :ref, :label, :ts)`)
|
||||
.run({
|
||||
":sid": args.sessionId,
|
||||
":idx": nextIndex,
|
||||
":ref": args.gitStashRef ?? null,
|
||||
":label": args.label ?? null,
|
||||
":ts": args.ts ?? new Date().toISOString(),
|
||||
});
|
||||
const row = currentDb
|
||||
.prepare(
|
||||
"SELECT id FROM session_snapshots WHERE session_id = :sid AND snapshot_index = :idx",
|
||||
)
|
||||
.get({ ":sid": args.sessionId, ":idx": nextIndex });
|
||||
return row ? Number(row["id"]) : 0;
|
||||
}
|
||||
/**
|
||||
* List all snapshots for a session, ordered by snapshot_index ascending.
|
||||
*
|
||||
* Purpose: let recovery tooling enumerate available restore points for a
|
||||
* session and present them to the operator before a rollback.
|
||||
*
|
||||
* Consumer: future /session snapshots command and irreversible-ops skill.
|
||||
*
|
||||
* @param {string} sessionId
|
||||
* @returns {Array<{id:number, session_id:string, snapshot_index:number, git_stash_ref:string|null, label:string|null, ts:string}>}
|
||||
*/
|
||||
export function listSessionSnapshots(sessionId) {
|
||||
if (!currentDb) return [];
|
||||
return currentDb
|
||||
.prepare(
|
||||
"SELECT * FROM session_snapshots WHERE session_id = :sid ORDER BY snapshot_index ASC",
|
||||
)
|
||||
.all({ ":sid": sessionId });
|
||||
}
|
||||
|
||||
/**
|
||||
* INSERT OR REPLACE a quality_gates row. Used by milestone-validation-gates.ts
|
||||
* to persist milestone-level (MV*) gate outcomes after validate-milestone runs.
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@ function normalize(name) {
|
|||
const UNIT_TYPE_SKILL_MANIFEST = {
|
||||
// Milestone-level planning / meta flows — predictable skill sets.
|
||||
"research-milestone": [
|
||||
"autoresearch",
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"write-docs",
|
||||
"write-milestone-brief",
|
||||
"decompose-into-slices",
|
||||
|
|
@ -40,6 +43,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
|
|||
"observability",
|
||||
],
|
||||
"plan-milestone": [
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"write-milestone-brief",
|
||||
"decompose-into-slices",
|
||||
"design-an-interface",
|
||||
|
|
@ -50,6 +55,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
|
|||
"verify-before-complete",
|
||||
],
|
||||
"roadmap-meeting": [
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"write-milestone-brief",
|
||||
"decompose-into-slices",
|
||||
"design-an-interface",
|
||||
|
|
@ -60,6 +67,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
|
|||
"verify-before-complete",
|
||||
],
|
||||
"complete-milestone": [
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"verify-before-complete",
|
||||
"write-docs",
|
||||
"handoff",
|
||||
|
|
@ -78,6 +87,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
|
|||
"observability",
|
||||
],
|
||||
"reassess-roadmap": [
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"decompose-into-slices",
|
||||
"grill-me",
|
||||
"write-milestone-brief",
|
||||
|
|
@ -86,6 +97,9 @@ const UNIT_TYPE_SKILL_MANIFEST = {
|
|||
],
|
||||
// Slice-level research / planning.
|
||||
"research-slice": [
|
||||
"autoresearch",
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"write-docs",
|
||||
"decompose-into-slices",
|
||||
"design-an-interface",
|
||||
|
|
@ -94,6 +108,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
|
|||
"observability",
|
||||
],
|
||||
"plan-slice": [
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"decompose-into-slices",
|
||||
"design-an-interface",
|
||||
"grill-me",
|
||||
|
|
@ -103,6 +119,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
|
|||
"verify-before-complete",
|
||||
],
|
||||
"refine-slice": [
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"decompose-into-slices",
|
||||
"design-an-interface",
|
||||
"grill-me",
|
||||
|
|
@ -112,6 +130,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
|
|||
"verify-before-complete",
|
||||
],
|
||||
"replan-slice": [
|
||||
"human-writing",
|
||||
"sf-wiki",
|
||||
"decompose-into-slices",
|
||||
"grill-me",
|
||||
"design-an-interface",
|
||||
|
|
|
|||
|
|
@ -7,10 +7,18 @@
|
|||
* Consumer: skill loader, auto-skill creation, and model context assembly.
|
||||
*/
|
||||
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { dirname, join } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const SKILL_FILENAME = "SKILL.md";
|
||||
const USER_SKILL_DIR = join(process.env.HOME ?? "", ".sf", "skills");
|
||||
const BUNDLED_SKILL_DIR = join(
|
||||
dirname(fileURLToPath(import.meta.url)),
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"skills",
|
||||
);
|
||||
|
||||
/**
|
||||
* Find all skill directories under a base path.
|
||||
|
|
@ -35,9 +43,17 @@ export function discoverSkillDirs(basePath) {
|
|||
/**
|
||||
* Discover skills from all sources: project, user, and built-in.
|
||||
*/
|
||||
export function discoverAllSkills(projectPath) {
|
||||
export function discoverAllSkills(projectPath, options = {}) {
|
||||
const sources = [];
|
||||
|
||||
// Bundled SF skills
|
||||
if (options.includeBundled && existsSync(BUNDLED_SKILL_DIR)) {
|
||||
const bundledSkills = discoverSkillDirsInRoot(BUNDLED_SKILL_DIR);
|
||||
for (const s of bundledSkills) {
|
||||
sources.push({ ...s, source: "bundled" });
|
||||
}
|
||||
}
|
||||
|
||||
// Project skills
|
||||
if (projectPath) {
|
||||
const projectSkills = discoverSkillDirs(projectPath);
|
||||
|
|
@ -59,6 +75,22 @@ export function discoverAllSkills(projectPath) {
|
|||
return sources;
|
||||
}
|
||||
|
||||
function discoverSkillDirsInRoot(skillRoot) {
|
||||
if (!existsSync(skillRoot)) return [];
|
||||
|
||||
const dirs = [];
|
||||
for (const entry of readdirSync(skillRoot)) {
|
||||
const full = join(skillRoot, entry);
|
||||
if (statSync(full).isDirectory()) {
|
||||
const skillFile = join(full, SKILL_FILENAME);
|
||||
if (existsSync(skillFile)) {
|
||||
dirs.push({ name: entry, path: full, skillFile });
|
||||
}
|
||||
}
|
||||
}
|
||||
return dirs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the raw content of a skill file.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ import {
|
|||
*
|
||||
* Returns array of skill records with validation errors attached.
|
||||
*/
|
||||
export function loadSkills(projectPath) {
|
||||
const discovered = discoverAllSkills(projectPath);
|
||||
export function loadSkills(projectPath, options = {}) {
|
||||
const discovered = discoverAllSkills(projectPath, options);
|
||||
const skills = [];
|
||||
|
||||
for (const { name, path, source } of discovered) {
|
||||
|
|
@ -47,7 +47,10 @@ export function loadSkills(projectPath) {
|
|||
continue;
|
||||
}
|
||||
|
||||
const validation = validateSkillFrontmatter(parsed.frontmatter);
|
||||
const validation =
|
||||
source === "bundled"
|
||||
? validateBundledSkillFrontmatter(parsed.frontmatter)
|
||||
: validateSkillFrontmatter(parsed.frontmatter);
|
||||
if (!validation.valid) {
|
||||
skills.push({
|
||||
name,
|
||||
|
|
@ -61,6 +64,15 @@ export function loadSkills(projectPath) {
|
|||
}
|
||||
|
||||
const record = buildSkillRecord(path, parsed.frontmatter, parsed.body);
|
||||
if (
|
||||
source === "bundled" &&
|
||||
parsed.frontmatter["user-invocable"] === undefined
|
||||
) {
|
||||
record.userInvocable = !isWorkflowOnlyBundledSkill(
|
||||
parsed.frontmatter,
|
||||
parsed.body,
|
||||
);
|
||||
}
|
||||
skills.push({
|
||||
...record,
|
||||
source,
|
||||
|
|
@ -72,6 +84,28 @@ export function loadSkills(projectPath) {
|
|||
return skills;
|
||||
}
|
||||
|
||||
function validateBundledSkillFrontmatter(frontmatter) {
|
||||
const errors = [];
|
||||
if (!frontmatter.name || typeof frontmatter.name !== "string") {
|
||||
errors.push("Missing or invalid 'name' field");
|
||||
}
|
||||
if (!frontmatter.description || typeof frontmatter.description !== "string") {
|
||||
errors.push("Missing or invalid 'description' field");
|
||||
}
|
||||
return {
|
||||
valid: errors.length === 0,
|
||||
errors,
|
||||
};
|
||||
}
|
||||
|
||||
function isWorkflowOnlyBundledSkill(frontmatter, body) {
|
||||
const text = `${frontmatter.description ?? ""}\n${body ?? ""}`.toLowerCase();
|
||||
return (
|
||||
text.includes("use inside autonomous workflow") ||
|
||||
text.includes("this is a workflow skill")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get skills that are safe for the current permission profile.
|
||||
*/
|
||||
|
|
@ -88,6 +122,20 @@ export function getPermittedSkills(skills, activeProfile) {
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get SF runtime skills that should appear in the user-facing /skills catalog.
|
||||
*
|
||||
* Purpose: keep repo/operator and workflow-only skills available to routing
|
||||
* without advertising them as SF runtime skills.
|
||||
*
|
||||
* Consumer: /skills list mode in the SF command surface.
|
||||
*/
|
||||
export function getUserInvocableSkills(skills) {
|
||||
return skills.filter(
|
||||
(s) => s.source === "bundled" && s.valid && s.userInvocable,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get skills that can be invoked by the model for a given work mode.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
// SF Extension — State Derivation
|
||||
// DB-primary state derivation with filesystem fallback for unmigrated projects.
|
||||
// DB-primary state derivation with explicit recovery guidance when DB-backed
|
||||
// projects cannot be opened. Legacy filesystem parsing remains available only
|
||||
// for projects that have not yet attempted DB bootstrap in this process.
|
||||
// Pure TypeScript, zero Pi dependencies.
|
||||
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
||||
import { join, resolve } from "node:path";
|
||||
|
|
@ -142,6 +144,44 @@ const CACHE_TTL_MS = 5000;
|
|||
let _stateCache = null;
|
||||
// ── Telemetry counters for derive-path observability ────────────────────────
|
||||
const _telemetry = { dbDeriveCount: 0, markdownDeriveCount: 0 };
|
||||
const EMPTY_REQUIREMENT_COUNTS = Object.freeze({
|
||||
active: 0,
|
||||
validated: 0,
|
||||
deferred: 0,
|
||||
outOfScope: 0,
|
||||
blocked: 0,
|
||||
total: 0,
|
||||
});
|
||||
|
||||
function hasLegacyRuntimeArtifacts(basePath) {
|
||||
if (findMilestoneIds(basePath).length > 0) return true;
|
||||
return (
|
||||
resolveSfRootFile(basePath, "PROJECT") !== null ||
|
||||
resolveSfRootFile(basePath, "REQUIREMENTS") !== null ||
|
||||
resolveSfRootFile(basePath, "DECISIONS") !== null ||
|
||||
resolveSfRootFile(basePath, "KNOWLEDGE") !== null ||
|
||||
resolveSfRootFile(basePath, "RUNTIME") !== null ||
|
||||
resolveSfRootFile(basePath, "STATE") !== null
|
||||
);
|
||||
}
|
||||
|
||||
function buildDbRecoveryRequiredState() {
|
||||
return {
|
||||
activeMilestone: null,
|
||||
activeSlice: null,
|
||||
activeTask: null,
|
||||
phase: "blocked",
|
||||
recentDecisions: [],
|
||||
blockers: [
|
||||
"DB-backed SF state is unavailable. Runtime does not fall back to markdown authority after DB bootstrap fails.",
|
||||
],
|
||||
nextAction:
|
||||
"Run `sf recover` to rebuild DB state from disk, or `sf migrate` for a legacy markdown-only project.",
|
||||
registry: [],
|
||||
requirements: EMPTY_REQUIREMENT_COUNTS,
|
||||
progress: { milestones: { done: 0, total: 0 } },
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Invalidate the deriveState() cache. Call this whenever planning files on disk
|
||||
* may have changed (unit completion, merges, file writes).
|
||||
|
|
@ -241,10 +281,18 @@ export async function deriveState(basePath) {
|
|||
// the DB simply hasn't been opened yet (e.g. during before_agent_start
|
||||
// context injection which runs before any tool invocation opens the DB).
|
||||
if (wasDbOpenAttempted()) {
|
||||
logWarning(
|
||||
"state",
|
||||
"DB unavailable — using filesystem state derivation (degraded mode)",
|
||||
);
|
||||
if (hasLegacyRuntimeArtifacts(basePath)) {
|
||||
logWarning(
|
||||
"state",
|
||||
"DB unavailable for a project with legacy SF artifacts — refusing runtime markdown fallback; run sf recover or sf migrate",
|
||||
);
|
||||
result = buildDbRecoveryRequiredState();
|
||||
stopTimer({ phase: result.phase, milestone: result.activeMilestone?.id });
|
||||
debugCount("deriveStateCalls");
|
||||
_stateCache = { basePath, result, timestamp: Date.now() };
|
||||
return result;
|
||||
}
|
||||
logWarning("state", "DB unavailable — using filesystem state derivation");
|
||||
}
|
||||
result = await _deriveStateImpl(basePath);
|
||||
_telemetry.markdownDeriveCount++;
|
||||
|
|
|
|||
137
src/resources/extensions/sf/steerable-autonomous-extension.js
Normal file
137
src/resources/extensions/sf/steerable-autonomous-extension.js
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
/**
|
||||
* Steerable Autonomous Extension - Copilot Auto-style controls
|
||||
*
|
||||
* Provides Shift+Tab interface for steering and asking questions
|
||||
* during autonomous execution, similar to Copilot Auto.
|
||||
* Also integrates Ctrl+Y for YOLO mode (bypass git prompts).
|
||||
*/
|
||||
|
||||
import {
|
||||
handleSteerableModeKey,
|
||||
SteerableAutonomousPanel,
|
||||
} from "./steerable-autonomous-panel.js";
|
||||
|
||||
export default function steerableAutonomousExtension(api) {
|
||||
let panel = null;
|
||||
let isAutonomousActive = false;
|
||||
|
||||
// Track autonomous mode state
|
||||
api.on("session_start", async (_, ctx) => {
|
||||
isAutonomousActive = false;
|
||||
if (panel) {
|
||||
panel.hide();
|
||||
panel = null;
|
||||
}
|
||||
});
|
||||
|
||||
// Handle key events - Shift+Tab and Ctrl+Y
|
||||
api.registerShortcut("shift+tab", {
|
||||
description: "Open/close steerable autonomous panel",
|
||||
handler: async (event, ctx) => {
|
||||
if (!isAutonomousActive) {
|
||||
ctx.ui.notify(
|
||||
"Autonomous mode not active - use /autonomous to start",
|
||||
"info",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (panel) {
|
||||
panel.hide();
|
||||
panel = null;
|
||||
} else {
|
||||
panel = new SteerableAutonomousPanel(ctx);
|
||||
await panel.show();
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
api.registerShortcut("ctrl+y", {
|
||||
description: "Toggle YOLO mode (bypass git prompts)",
|
||||
handler: async (event, ctx) => {
|
||||
if (ctx.settingsManager && ctx.settingsManager.toggleYOLO) {
|
||||
const enabled = ctx.settingsManager.toggleYOLO();
|
||||
ctx.ui.notify(
|
||||
`🚀 YOLO mode ${enabled ? "ON" : "OFF"} - safe-git prompts ${enabled ? "disabled" : "enabled"}`,
|
||||
enabled ? "success" : "info",
|
||||
);
|
||||
} else {
|
||||
ctx.ui.notify(
|
||||
"🚀 YOLO mode - safe-git prompts disabled for this session",
|
||||
"success",
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
// Handle slash command for panel
|
||||
api.registerCommand("steer", {
|
||||
description: "Open steerable autonomous panel (Shift+Tab)",
|
||||
handler: async (_, ctx) => {
|
||||
if (!isAutonomousActive) {
|
||||
ctx.ui.notify(
|
||||
"Autonomous mode not active - use /autonomous to start",
|
||||
"info",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (panel) {
|
||||
panel.hide();
|
||||
panel = null;
|
||||
} else {
|
||||
panel = new SteerableAutonomousPanel(ctx);
|
||||
await panel.show();
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
// Listen for autonomous mode changes
|
||||
api.on("autonomous_start", async (_, ctx) => {
|
||||
isAutonomousActive = true;
|
||||
ctx.ui.notify("🤖 Autonomous mode active - Shift+Tab to steer", "info");
|
||||
});
|
||||
|
||||
api.on("autonomous_stop", async (_, ctx) => {
|
||||
isAutonomousActive = false;
|
||||
if (panel) {
|
||||
panel.hide();
|
||||
panel = null;
|
||||
}
|
||||
ctx.ui.notify("⏹️ Autonomous mode stopped", "info");
|
||||
});
|
||||
|
||||
// Show current autonomous status
|
||||
api.registerCommand("autonomous-status", {
|
||||
description: "Show current autonomous mode status and retry attempts",
|
||||
handler: async (_, ctx) => {
|
||||
if (!isAutonomousActive) {
|
||||
ctx.ui.notify("Autonomous mode not active", "info");
|
||||
return;
|
||||
}
|
||||
|
||||
// Would get current status from autonomous system
|
||||
const status = {
|
||||
currentPhase: "research",
|
||||
attempts: 3,
|
||||
status: "working on current task",
|
||||
retryHistory: ["approach A", "approach B", "approach C"],
|
||||
};
|
||||
|
||||
const lines = [
|
||||
"🤖 Autonomous Mode Status",
|
||||
"",
|
||||
`Current Phase: ${status.currentPhase}`,
|
||||
`Status: ${status.status}`,
|
||||
`Attempts: ${status.attempts}`,
|
||||
"",
|
||||
"Recent Attempts:",
|
||||
...status.retryHistory.map((attempt, i) => ` ${i + 1}. ${attempt}`),
|
||||
"",
|
||||
"Use Shift+Tab or /steer to control",
|
||||
];
|
||||
|
||||
ctx.ui.notify(lines.join("\n"), "info");
|
||||
},
|
||||
});
|
||||
}
|
||||
360
src/resources/extensions/sf/steerable-autonomous-panel.js
Normal file
360
src/resources/extensions/sf/steerable-autonomous-panel.js
Normal file
|
|
@ -0,0 +1,360 @@
|
|||
/**
|
||||
* Steerable Autonomous Mode - Interactive Control Panel
|
||||
*
|
||||
* Provides Shift+Tab interface for steering and asking questions
|
||||
* during autonomous execution, similar to Copilot Auto.
|
||||
* Also integrates Ctrl+Y for YOLO mode (bypass git prompts).
|
||||
*/
|
||||
|
||||
import { createInterface } from "node:readline";
|
||||
import { getEditorKeybindings } from "@singularity-forge/pi-tui";
|
||||
|
||||
// ─── Constants ──────────────────────────────────────────────────────────────
|
||||
const PANEL_WIDTH = 60;
|
||||
const PANEL_HEIGHT = 12;
|
||||
|
||||
const CONTROL_CATEGORIES = [
|
||||
{
|
||||
name: "🎯 Steering",
|
||||
items: [
|
||||
{ key: "1", label: "Focus on research", action: "focus_research" },
|
||||
{ key: "2", label: "Focus on planning", action: "focus_plan" },
|
||||
{ key: "3", label: "Focus on implementation", action: "focus_build" },
|
||||
{ key: "4", label: "Speed up execution", action: "speed_up" },
|
||||
{ key: "5", label: "Slow down execution", action: "slow_down" },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "❓ Ask Questions",
|
||||
items: [
|
||||
{ key: "q", label: "What are you working on?", action: "ask_status" },
|
||||
{ key: "w", label: "Why this approach?", action: "ask_reasoning" },
|
||||
{ key: "e", label: "What's next?", action: "ask_next" },
|
||||
{ key: "r", label: "Are you stuck?", action: "ask_stuck" },
|
||||
{ key: "t", label: "Explain your plan", action: "ask_plan" },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "🔄 Retry Status",
|
||||
items: [
|
||||
{
|
||||
key: "a",
|
||||
label: "What attempts have been tried?",
|
||||
action: "ask_attempts",
|
||||
},
|
||||
{
|
||||
key: "z",
|
||||
label: "Why give up? What blockers?",
|
||||
action: "ask_blockers",
|
||||
},
|
||||
{ key: "r", label: "Reassess and try new approach", action: "reassess" },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "⚡ Quick Controls",
|
||||
items: [
|
||||
{ key: "p", label: "Pause autonomous", action: "pause" },
|
||||
{ key: "s", label: "Stop execution", action: "stop" },
|
||||
{ key: "y", label: "YOLO mode (Ctrl+Y)", action: "yolo" },
|
||||
{ key: "h", label: "Help/commands", action: "help" },
|
||||
{ key: "esc", label: "Close panel", action: "close" },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
// ─── UI Rendering ─────────────────────────────────────────────────────────────
|
||||
|
||||
function renderBox(lines, title = "") {
|
||||
const width = PANEL_WIDTH;
|
||||
const horizontalBorder = "─".repeat(width - 2);
|
||||
|
||||
let result = `┌─${title} ${horizontalBorder.slice(title.length + 1)}─┐\n`;
|
||||
|
||||
for (const line of lines) {
|
||||
const padded = line.padEnd(width - 2, " ");
|
||||
result += `│ ${padded} │\n`;
|
||||
}
|
||||
|
||||
result += `└─${horizontalBorder}─┘\n`;
|
||||
return result;
|
||||
}
|
||||
|
||||
function renderCategory(category) {
|
||||
const lines = [`\x1b[1m${category.name}\x1b[0m`];
|
||||
|
||||
for (const item of category.items) {
|
||||
const keyDisplay = item.key === "esc" ? "Esc" : item.key.toUpperCase();
|
||||
lines.push(` ${keyDisplay}. ${item.label}`);
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
function renderPanel(currentStatus = "") {
|
||||
const categories = CONTROL_CATEGORIES;
|
||||
|
||||
const panelContent = [];
|
||||
|
||||
// Add status line if provided
|
||||
if (currentStatus) {
|
||||
panelContent.push(`🤖 ${currentStatus}`);
|
||||
panelContent.push(""); // empty line
|
||||
}
|
||||
|
||||
// Render all categories
|
||||
for (const category of categories) {
|
||||
panelContent.push(...renderCategory(category));
|
||||
if (category !== categories[categories.length - 1]) {
|
||||
panelContent.push(""); // spacing between categories
|
||||
}
|
||||
}
|
||||
|
||||
// Add footer
|
||||
panelContent.push("");
|
||||
panelContent.push(
|
||||
"\x1b[90mShift+Tab or / to open/close • Ctrl+Y for YOLO\x1b[0m",
|
||||
);
|
||||
|
||||
return renderBox(panelContent, "🎛️ Steerable Autonomous Mode");
|
||||
}
|
||||
|
||||
// ─── Action Handlers ────────────────────────────────────────────────────────────
|
||||
|
||||
const ACTION_HANDLERS = {
|
||||
focus_research: async (ctx) => {
|
||||
ctx.ui.notify("🎯 Focusing on research phase", "info");
|
||||
// Would set autonomous mode to prioritize research
|
||||
},
|
||||
|
||||
focus_plan: async (ctx) => {
|
||||
ctx.ui.notify("🎯 Focusing on planning phase", "info");
|
||||
// Would set autonomous mode to prioritize planning
|
||||
},
|
||||
|
||||
focus_build: async (ctx) => {
|
||||
ctx.ui.notify("🎯 Focusing on implementation phase", "info");
|
||||
// Would set autonomous mode to prioritize building
|
||||
},
|
||||
|
||||
speed_up: async (ctx) => {
|
||||
ctx.ui.notify("⚡ Execution speed increased", "info");
|
||||
// Would adjust autonomous execution speed
|
||||
},
|
||||
|
||||
slow_down: async (ctx) => {
|
||||
ctx.ui.notify("🐌 Execution speed decreased", "info");
|
||||
// Would adjust autonomous execution speed
|
||||
},
|
||||
|
||||
ask_status: async (ctx) => {
|
||||
ctx.ui.notify("🤖 I'm currently working on [current task]", "info");
|
||||
// Would provide current status via AI response
|
||||
},
|
||||
|
||||
ask_reasoning: async (ctx) => {
|
||||
ctx.ui.notify("🤖 I chose this approach because...", "info");
|
||||
// Would provide reasoning via AI response
|
||||
},
|
||||
|
||||
ask_next: async (ctx) => {
|
||||
ctx.ui.notify("🤖 Next I'll [next step]", "info");
|
||||
// Would provide next steps via AI response
|
||||
},
|
||||
|
||||
ask_stuck: async (ctx) => {
|
||||
ctx.ui.notify("🤖 I'm not stuck, but here's my status...", "info");
|
||||
// Would provide stuck status via AI response
|
||||
},
|
||||
|
||||
ask_plan: async (ctx) => {
|
||||
ctx.ui.notify("🤖 My plan is: [detailed plan]", "info");
|
||||
// Would provide plan explanation via AI response
|
||||
},
|
||||
|
||||
pause: async (ctx) => {
|
||||
ctx.ui.notify("⏸️ Autonomous mode paused", "info");
|
||||
// Would pause autonomous execution
|
||||
},
|
||||
|
||||
yolo: async (ctx) => {
|
||||
// Toggle YOLO mode - integrate with existing SafeGit system
|
||||
if (ctx.settingsManager && ctx.settingsManager.toggleYOLO) {
|
||||
const enabled = ctx.settingsManager.toggleYOLO();
|
||||
ctx.ui.notify(
|
||||
`🚀 YOLO mode ${enabled ? "ON" : "OFF"} - safe-git prompts ${enabled ? "disabled" : "enabled"}`,
|
||||
enabled ? "success" : "info",
|
||||
);
|
||||
} else {
|
||||
ctx.ui.notify(
|
||||
"🚀 YOLO mode - safe-git prompts disabled for this session",
|
||||
"success",
|
||||
);
|
||||
}
|
||||
},
|
||||
|
||||
help: async (ctx) => {
|
||||
// Show help about the steerable mode
|
||||
const helpText = renderPanel("Available controls shown above");
|
||||
ctx.ui.notify("Steerable Autonomous Mode Help\n\n" + helpText, "info");
|
||||
},
|
||||
|
||||
ask_attempts: async (ctx) => {
|
||||
ctx.ui.notify(
|
||||
"🤖 I've tried multiple approaches: [list of attempts]",
|
||||
"info",
|
||||
);
|
||||
// Would provide list of attempted approaches
|
||||
},
|
||||
|
||||
ask_blockers: async (ctx) => {
|
||||
ctx.ui.notify("🤖 Main blockers: [list of current blockers]", "info");
|
||||
// Would explain why it's giving up
|
||||
},
|
||||
|
||||
reassess: async (ctx) => {
|
||||
ctx.ui.notify("🔄 Reassessing - trying new approaches", "info");
|
||||
// Would trigger immediate reassessment
|
||||
},
|
||||
|
||||
close: async (ctx) => {
|
||||
// Just hide the panel
|
||||
},
|
||||
};
|
||||
|
||||
// ─── Panel Controller ──────────────────────────────────────────────────────────
|
||||
|
||||
export class SteerableAutonomousPanel {
|
||||
constructor(ctx) {
|
||||
this.ctx = ctx;
|
||||
this.isVisible = false;
|
||||
this.rl = null;
|
||||
}
|
||||
|
||||
async show() {
|
||||
if (this.isVisible) return;
|
||||
|
||||
this.isVisible = true;
|
||||
this.rl = createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
terminal: true,
|
||||
});
|
||||
|
||||
// Hide cursor while panel is open
|
||||
process.stdout.write("\x1b[?25l");
|
||||
|
||||
// Render panel
|
||||
this.render();
|
||||
|
||||
// Set up key listener
|
||||
this.rl.input.on("keypress", (str, key) => {
|
||||
this.handleKeyPress(key);
|
||||
});
|
||||
}
|
||||
|
||||
hide() {
|
||||
if (!this.isVisible) return;
|
||||
|
||||
this.isVisible = false;
|
||||
|
||||
// Restore cursor
|
||||
process.stdout.write("\x1b[?25h");
|
||||
|
||||
// Clear the panel area
|
||||
process.stdout.write("\x1b[" + PANEL_HEIGHT + "F"); // Move cursor up
|
||||
process.stdout.write("\x1b[0J"); // Clear from cursor down
|
||||
|
||||
if (this.rl) {
|
||||
this.rl.close();
|
||||
this.rl = null;
|
||||
}
|
||||
}
|
||||
|
||||
async render() {
|
||||
if (!this.isVisible) return;
|
||||
|
||||
// Get current autonomous status (would come from actual system)
|
||||
const currentStatus = "Working on current milestone...";
|
||||
|
||||
const panel = renderPanel(currentStatus);
|
||||
|
||||
// Move cursor to panel area
|
||||
process.stdout.write("\x1b[s"); // Save current position
|
||||
process.stdout.write("\x1b[H"); // Move to top-left
|
||||
process.stdout.write(panel);
|
||||
process.stdout.write("\x1b[u"); // Restore saved position
|
||||
}
|
||||
|
||||
async handleKeyPress(key) {
|
||||
if (!this.isVisible) return;
|
||||
|
||||
// Handle escape sequences
|
||||
if (key.name === "escape") {
|
||||
this.hide();
|
||||
return;
|
||||
}
|
||||
|
||||
// Find matching action
|
||||
let actionKey = key.name || key.sequence?.toLowerCase() || "";
|
||||
|
||||
// Handle single character keys
|
||||
if (actionKey.length === 1) {
|
||||
actionKey = actionKey.toLowerCase();
|
||||
}
|
||||
|
||||
// Find action
|
||||
let action = null;
|
||||
for (const category of CONTROL_CATEGORIES) {
|
||||
const item = category.items.find((item) => item.key === actionKey);
|
||||
if (item) {
|
||||
action = item;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (action) {
|
||||
await ACTION_HANDLERS[action.action](this.ctx);
|
||||
|
||||
// If it's not a close action, re-render panel
|
||||
if (action.action !== "close") {
|
||||
this.render();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Integration Hook ──────────────────────────────────────────────────────────
|
||||
|
||||
let activePanel = null;
|
||||
|
||||
export async function showSteerablePanel(ctx) {
|
||||
if (activePanel) {
|
||||
activePanel.hide();
|
||||
}
|
||||
|
||||
activePanel = new SteerableAutonomousPanel(ctx);
|
||||
await activePanel.show();
|
||||
}
|
||||
|
||||
export async function hideSteerablePanel() {
|
||||
if (activePanel) {
|
||||
activePanel.hide();
|
||||
activePanel = null;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Keyboard Integration (would integrate with TUI's key handler) ──────────
|
||||
|
||||
export function handleSteerableModeKey(key) {
|
||||
// Shift+Tab opens/closes the panel
|
||||
if (key.shift && key.name === "tab") {
|
||||
return true; // Signal that we handled this key
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export default {
|
||||
show: showSteerablePanel,
|
||||
hide: hideSteerablePanel,
|
||||
handleKey: handleSteerableModeKey,
|
||||
};
|
||||
|
|
@ -4,7 +4,16 @@ mode:
|
|||
always_use_skills: []
|
||||
prefer_skills: []
|
||||
avoid_skills: []
|
||||
skill_rules: []
|
||||
skill_rules:
|
||||
- when: writing or editing docs, plans, records, handoffs, PR text, or other human-readable prose
|
||||
use:
|
||||
- human-writing
|
||||
- when: building repo orientation, architecture maps, generated wiki, subsystem inventory, or durable codebase context
|
||||
use:
|
||||
- sf-wiki
|
||||
- when: optimizing a measurable metric through experiments, benchmarks, performance work, bundle size, test speed, or model quality
|
||||
use:
|
||||
- autoresearch
|
||||
custom_instructions: []
|
||||
models: {}
|
||||
skill_discovery:
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import {
|
|||
closeDatabase,
|
||||
getAllMilestones,
|
||||
getSliceTasks,
|
||||
isDbAvailable,
|
||||
insertMilestone,
|
||||
insertSlice,
|
||||
insertTask,
|
||||
|
|
@ -24,6 +25,10 @@ const tmpDirs = [];
|
|||
|
||||
afterEach(() => {
|
||||
closeDatabase();
|
||||
if (!isDbAvailable()) {
|
||||
openDatabase(":memory:");
|
||||
closeDatabase();
|
||||
}
|
||||
invalidateStateCache();
|
||||
while (tmpDirs.length > 0) {
|
||||
const dir = tmpDirs.pop();
|
||||
|
|
@ -239,3 +244,34 @@ test("deriveState_when_task_summary_exists_keeps_db_task_status_authoritative",
|
|||
assert.equal(firstTask.id, "T02");
|
||||
assert.equal(firstTask.status, "pending");
|
||||
});
|
||||
|
||||
test("deriveState_when_db_bootstrap_failed_refuses_legacy_markdown_runtime_fallback", async () => {
|
||||
const project = mkdtempSync(join(tmpdir(), "sf-db-runtime-state-"));
|
||||
tmpDirs.push(project);
|
||||
const milestoneDir = join(project, ".sf", "milestones", "M780");
|
||||
mkdirSync(milestoneDir, { recursive: true });
|
||||
writeFileSync(
|
||||
join(milestoneDir, "M780-ROADMAP.md"),
|
||||
[
|
||||
"# M780: legacy roadmap only",
|
||||
"",
|
||||
"## Slice Overview",
|
||||
"| ID | Slice | Risk | Depends | Done | After this |",
|
||||
"|----|-------|------|---------|------|------------|",
|
||||
"| S01 | Should require recovery | low | - | | migrate first |",
|
||||
"",
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
try {
|
||||
openDatabase(join(project, ".sf"));
|
||||
} catch {}
|
||||
|
||||
const state = await deriveState(project);
|
||||
|
||||
assert.equal(state.phase, "blocked");
|
||||
assert.equal(state.activeMilestone, null);
|
||||
assert.match(state.blockers[0], /does not fall back to markdown authority/i);
|
||||
assert.match(state.nextAction, /sf recover/i);
|
||||
assert.match(state.nextAction, /sf migrate/i);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import assert from "node:assert/strict";
|
|||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { test } from "vitest";
|
||||
|
||||
import guardrails from "../../guardrails/index.js";
|
||||
import {
|
||||
DIRECT_SF_COMMAND_NAMES,
|
||||
getSfTopLevelCommandCompletions,
|
||||
|
|
@ -81,3 +81,27 @@ test("human_facing_cli_help_when_describing_sf_surfaces_uses_direct_commands", (
|
|||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("guardrails_registers_safegit_as_slash_command_surface", () => {
|
||||
const commands = new Map();
|
||||
const pi = {
|
||||
registerCommand(name, options) {
|
||||
commands.set(name, options);
|
||||
},
|
||||
on() {},
|
||||
};
|
||||
|
||||
guardrails(pi);
|
||||
|
||||
for (const commandName of [
|
||||
"safegit",
|
||||
"safegit-level",
|
||||
"safegit-status",
|
||||
"yolo",
|
||||
]) {
|
||||
const command = commands.get(commandName);
|
||||
assert.equal(typeof command?.handler, "function");
|
||||
assert.match(command.description, new RegExp(`/${commandName}\\b`));
|
||||
assert.match(command.description, /Slash command/);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,14 +1,20 @@
|
|||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
getMetricsSystemStats,
|
||||
getMetricsText,
|
||||
getSystemPerformanceDashboard,
|
||||
initMetricsCentral,
|
||||
queryMetrics,
|
||||
recordCost,
|
||||
recordCounter,
|
||||
recordDatabaseOperation,
|
||||
recordGauge,
|
||||
recordHistogram,
|
||||
recordModelRequest,
|
||||
recordToolExecution,
|
||||
registerMetricMeta,
|
||||
stopMetricsCentral,
|
||||
updateResourceGauges,
|
||||
} from "../metrics-central.js";
|
||||
|
||||
describe("metrics-central", () => {
|
||||
|
|
@ -119,4 +125,70 @@ describe("metrics-central", () => {
|
|||
const results = queryMetrics(null, "sess-1", "sf_test");
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
it("dashboard_reads_counter_gauge_and_histogram_values", () => {
|
||||
recordCost("unit-dashboard", "model-dashboard", 10, 20, 0.5, "build");
|
||||
recordToolExecution("read", 30);
|
||||
recordModelRequest("model-dashboard", 40);
|
||||
recordDatabaseOperation("select", 50);
|
||||
updateResourceGauges({
|
||||
activeSessions: 1,
|
||||
activeAgents: 2,
|
||||
concurrentToolCalls: 3,
|
||||
});
|
||||
|
||||
const dashboard = getSystemPerformanceDashboard();
|
||||
expect(dashboard.cost).toBeGreaterThanOrEqual(0.5);
|
||||
expect(dashboard.tokens.input).toBeGreaterThanOrEqual(10);
|
||||
expect(dashboard.tokens.output).toBeGreaterThanOrEqual(20);
|
||||
expect(dashboard.performance.averageToolExecution).toBeGreaterThanOrEqual(
|
||||
30,
|
||||
);
|
||||
expect(dashboard.performance.averageModelRequest).toBeGreaterThanOrEqual(
|
||||
40,
|
||||
);
|
||||
expect(dashboard.performance.averageDatabaseQuery).toBeGreaterThanOrEqual(
|
||||
50,
|
||||
);
|
||||
expect(dashboard.resources.activeSessions).toBe(1);
|
||||
expect(dashboard.resources.activeAgents).toBe(2);
|
||||
expect(dashboard.resources.concurrentToolCalls).toBe(3);
|
||||
expect(getMetricsSystemStats().databaseStatus).toBe("disconnected");
|
||||
});
|
||||
|
||||
it("stopMetricsCentral_persists_metrics_to_db_adapter", () => {
|
||||
const rows = [];
|
||||
const db = {
|
||||
exec() {},
|
||||
prepare(sql) {
|
||||
if (sql.startsWith("INSERT")) {
|
||||
return {
|
||||
run(name, type, labels, value, timestamp, sessionId) {
|
||||
rows.push({ name, type, labels, value, timestamp, sessionId });
|
||||
},
|
||||
};
|
||||
}
|
||||
throw new Error(`unexpected SQL: ${sql}`);
|
||||
},
|
||||
};
|
||||
initMetricsCentral("/tmp/test-project", {
|
||||
dbAdapter: db,
|
||||
sessionId: "sess-db",
|
||||
});
|
||||
recordCounter("sf_test_db_counter", { label: "a=b,c" }, 2);
|
||||
|
||||
stopMetricsCentral();
|
||||
|
||||
expect(rows).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
name: "sf_test_db_counter",
|
||||
type: "counter",
|
||||
labels: JSON.stringify({ label: "a=b,c", session_id: "sess-db" }),
|
||||
value: 2,
|
||||
sessionId: "sess-db",
|
||||
}),
|
||||
]),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
|
||||
describe("SF hook bootstrap", () => {
|
||||
it("register_hooks_module_imports_without_touching_runtime_pi", async () => {
|
||||
const mod = await import("../bootstrap/register-hooks.js");
|
||||
expect(typeof mod.registerHooks).toBe("function");
|
||||
expect(typeof mod.runAgentEndMemoryBackfill).toBe("function");
|
||||
});
|
||||
});
|
||||
|
|
@ -5,7 +5,13 @@
|
|||
* automatically when later backfills depend on newly introduced columns.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
||||
import {
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readdirSync,
|
||||
rmSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { DatabaseSync } from "node:sqlite";
|
||||
|
|
@ -217,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
|
|||
const version = db
|
||||
.prepare("SELECT MAX(version) AS version FROM schema_version")
|
||||
.get();
|
||||
assert.equal(version.version, 45);
|
||||
assert.equal(version.version, 49);
|
||||
const taskSpec = db
|
||||
.prepare(
|
||||
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",
|
||||
|
|
@ -257,6 +263,24 @@ test("openDatabase_when_fresh_db_supports_schedule_entries", () => {
|
|||
assert.deepEqual(rows[0].payload, { message: "check DB schedule" });
|
||||
});
|
||||
|
||||
test("openDatabase_when_file_backed_creates_db_snapshot_and_maintenance_marker", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-db-backup-"));
|
||||
tmpDirs.push(dir);
|
||||
const sfDir = join(dir, ".sf");
|
||||
mkdirSync(sfDir, { recursive: true });
|
||||
const dbPath = join(sfDir, "sf.db");
|
||||
|
||||
assert.equal(openDatabase(dbPath), true);
|
||||
closeDatabase();
|
||||
|
||||
const backupDir = join(sfDir, "backups", "db");
|
||||
const backups = readdirSync(backupDir).filter((name) =>
|
||||
name.startsWith("sf.db."),
|
||||
);
|
||||
assert.equal(backups.length, 1);
|
||||
assert.equal(existsSync(join(backupDir, "maintenance.json")), true);
|
||||
});
|
||||
|
||||
test("openDatabase_when_fresh_db_supports_gate_run_micro_usd", () => {
|
||||
assert.equal(openDatabase(":memory:"), true);
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import {
|
|||
import {
|
||||
getModelInvocableSkills,
|
||||
getPermittedSkills,
|
||||
getUserInvocableSkills,
|
||||
loadSkills,
|
||||
} from "../skills/loader.js";
|
||||
|
||||
|
|
@ -250,4 +251,32 @@ describe("skill loading", () => {
|
|||
expect(buildSkills.some((s) => s.name === "review-skill")).toBe(false);
|
||||
expect(buildSkills.some((s) => s.name === "user-only")).toBe(false);
|
||||
});
|
||||
|
||||
test("getUserInvocableSkills_shows_bundled_runtime_skills_only", () => {
|
||||
createSkill("human-facing", { userInvocable: true });
|
||||
createSkill("autoresearch", { userInvocable: false });
|
||||
const badDir = join(tmpDir, ".agents", "skills", "droid-evolved");
|
||||
mkdirSync(badDir, { recursive: true });
|
||||
writeFileSync(
|
||||
join(badDir, "SKILL.md"),
|
||||
`---\nname: droid-evolved\ndescription: Workflow-only skill\nuser-invocable: true\n---\n\n# Invalid\n`,
|
||||
);
|
||||
|
||||
const visible = getUserInvocableSkills([
|
||||
...loadSkills(tmpDir),
|
||||
{
|
||||
name: "bundled-human-writing",
|
||||
source: "bundled",
|
||||
valid: true,
|
||||
userInvocable: true,
|
||||
},
|
||||
{
|
||||
name: "project-forge-command-surface",
|
||||
source: "project",
|
||||
valid: true,
|
||||
userInvocable: true,
|
||||
},
|
||||
]);
|
||||
expect(visible.map((s) => s.name)).toEqual(["bundled-human-writing"]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -22,6 +22,12 @@ vi.mock("../sf-db.js", () => ({
|
|||
import * as memoryStore from "../memory-store.js";
|
||||
import * as sfDb from "../sf-db.js";
|
||||
|
||||
function expectMemoryCreate(fields) {
|
||||
expect(memoryStore.createMemory).toHaveBeenCalledWith(
|
||||
expect.objectContaining(fields),
|
||||
);
|
||||
}
|
||||
|
||||
describe("UOK Memory Integration", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
|
|
@ -36,11 +42,14 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, status, result);
|
||||
|
||||
expect(memoryStore.createMemory).toHaveBeenCalledWith(
|
||||
"pattern",
|
||||
"Unit type 'execute-task' succeeded with outcome: all tests passed",
|
||||
0.9,
|
||||
);
|
||||
expectMemoryCreate({
|
||||
category: "pattern",
|
||||
content:
|
||||
"Unit type 'execute-task' succeeded with outcome: all tests passed",
|
||||
confidence: 0.9,
|
||||
source_unit_type: "execute-task",
|
||||
source_unit_id: "M001-S01-T01",
|
||||
});
|
||||
});
|
||||
|
||||
it("records_failed_unit_completion_as_pattern", async () => {
|
||||
|
|
@ -50,11 +59,14 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, status, result);
|
||||
|
||||
expect(memoryStore.createMemory).toHaveBeenCalledWith(
|
||||
"pattern",
|
||||
"Unit type 'research-slice' failed with status: failed (timeout after 5 minutes)",
|
||||
0.5,
|
||||
);
|
||||
expectMemoryCreate({
|
||||
category: "pattern",
|
||||
content:
|
||||
"Unit type 'research-slice' failed with status: failed (timeout after 5 minutes)",
|
||||
confidence: 0.5,
|
||||
source_unit_type: "research-slice",
|
||||
source_unit_id: "M001-S02",
|
||||
});
|
||||
});
|
||||
|
||||
it("uses_lower_confidence_for_failures", async () => {
|
||||
|
|
@ -66,8 +78,10 @@ describe("UOK Memory Integration", () => {
|
|||
await successCall();
|
||||
await failureCall();
|
||||
|
||||
const successConfidence = memoryStore.createMemory.mock.calls[0][2];
|
||||
const failureConfidence = memoryStore.createMemory.mock.calls[1][2];
|
||||
const successConfidence =
|
||||
memoryStore.createMemory.mock.calls[0][0].confidence;
|
||||
const failureConfidence =
|
||||
memoryStore.createMemory.mock.calls[1][0].confidence;
|
||||
|
||||
expect(successConfidence).toBe(0.9);
|
||||
expect(failureConfidence).toBe(0.5);
|
||||
|
|
@ -105,11 +119,11 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, status, result);
|
||||
|
||||
expect(memoryStore.createMemory).toHaveBeenCalledWith(
|
||||
"pattern",
|
||||
expect.stringContaining("blocked"),
|
||||
0.5,
|
||||
);
|
||||
expectMemoryCreate({
|
||||
category: "pattern",
|
||||
content: expect.stringContaining("blocked"),
|
||||
confidence: 0.5,
|
||||
});
|
||||
});
|
||||
|
||||
it("handles_stale_status_as_failure", async () => {
|
||||
|
|
@ -119,11 +133,11 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, status, result);
|
||||
|
||||
expect(memoryStore.createMemory).toHaveBeenCalledWith(
|
||||
"pattern",
|
||||
expect.stringContaining("stale"),
|
||||
0.5,
|
||||
);
|
||||
expectMemoryCreate({
|
||||
category: "pattern",
|
||||
content: expect.stringContaining("stale"),
|
||||
confidence: 0.5,
|
||||
});
|
||||
});
|
||||
|
||||
it("extracts_unitType_from_unitType_property", async () => {
|
||||
|
|
@ -132,11 +146,12 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, status, {});
|
||||
|
||||
expect(memoryStore.createMemory).toHaveBeenCalledWith(
|
||||
"pattern",
|
||||
expect.stringContaining("plan-milestone"),
|
||||
0.9,
|
||||
);
|
||||
expectMemoryCreate({
|
||||
category: "pattern",
|
||||
content: expect.stringContaining("plan-milestone"),
|
||||
confidence: 0.9,
|
||||
source_unit_type: "plan-milestone",
|
||||
});
|
||||
});
|
||||
|
||||
it("defaults_to_unknown_if_no_type_provided", async () => {
|
||||
|
|
@ -145,11 +160,12 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, status, {});
|
||||
|
||||
expect(memoryStore.createMemory).toHaveBeenCalledWith(
|
||||
"pattern",
|
||||
expect.stringContaining("unknown"),
|
||||
0.9,
|
||||
);
|
||||
expectMemoryCreate({
|
||||
category: "pattern",
|
||||
content: expect.stringContaining("unknown"),
|
||||
confidence: 0.9,
|
||||
source_unit_type: "unknown",
|
||||
});
|
||||
});
|
||||
|
||||
it("uses_status_as_outcome_when_result_outcome_missing", async () => {
|
||||
|
|
@ -159,11 +175,11 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, status, result);
|
||||
|
||||
expect(memoryStore.createMemory).toHaveBeenCalledWith(
|
||||
"pattern",
|
||||
"Unit type 'test-unit' succeeded with outcome: completed",
|
||||
0.9,
|
||||
);
|
||||
expectMemoryCreate({
|
||||
category: "pattern",
|
||||
content: "Unit type 'test-unit' succeeded with outcome: completed",
|
||||
confidence: 0.9,
|
||||
});
|
||||
});
|
||||
|
||||
it("categorizes_all_patterns_as_pattern_category", async () => {
|
||||
|
|
@ -174,7 +190,7 @@ describe("UOK Memory Integration", () => {
|
|||
await recordUnitOutcomeInMemory(unit, "blocked", {});
|
||||
|
||||
for (const call of memoryStore.createMemory.mock.calls) {
|
||||
expect(call[0]).toBe("pattern");
|
||||
expect(call[0].category).toBe("pattern");
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -186,8 +202,8 @@ describe("UOK Memory Integration", () => {
|
|||
});
|
||||
await recordUnitOutcomeInMemory(unit, "failed", { error: "auth failed" });
|
||||
|
||||
const call1 = memoryStore.createMemory.mock.calls[0][1];
|
||||
const call2 = memoryStore.createMemory.mock.calls[1][1];
|
||||
const call1 = memoryStore.createMemory.mock.calls[0][0].content;
|
||||
const call2 = memoryStore.createMemory.mock.calls[1][0].content;
|
||||
|
||||
expect(call1).toContain("timeout");
|
||||
expect(call2).toContain("auth failed");
|
||||
|
|
@ -210,7 +226,7 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, "completed", {});
|
||||
|
||||
const categoryArg = memoryStore.createMemory.mock.calls[0][0];
|
||||
const categoryArg = memoryStore.createMemory.mock.calls[0][0].category;
|
||||
expect(categoryArg).toBe("pattern");
|
||||
});
|
||||
|
||||
|
|
@ -220,7 +236,7 @@ describe("UOK Memory Integration", () => {
|
|||
|
||||
await recordUnitOutcomeInMemory(unit, "completed", result);
|
||||
|
||||
const pattern = memoryStore.createMemory.mock.calls[0][1];
|
||||
const pattern = memoryStore.createMemory.mock.calls[0][0].content;
|
||||
expect(pattern).toMatch(/code-review/);
|
||||
expect(pattern).toMatch(/succeeded/);
|
||||
expect(pattern).toMatch(/3 files reviewed/);
|
||||
|
|
@ -260,7 +276,7 @@ describe("UOK Memory Integration", () => {
|
|||
outcome: "very specific outcome",
|
||||
});
|
||||
|
||||
const pattern = memoryStore.createMemory.mock.calls[0][1];
|
||||
const pattern = memoryStore.createMemory.mock.calls[0][0].content;
|
||||
expect(pattern).toContain("specific-task-type");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -271,6 +271,58 @@ test("writeUnitRuntimeRecord_merges_updates", () => {
|
|||
assert.equal(record.progressCount, 2);
|
||||
});
|
||||
|
||||
test("writeUnitRuntimeRecord_when_lineage_event_started_persists_current_worker", () => {
|
||||
const root = makeProject();
|
||||
const record = writeUnitRuntimeRecord(
|
||||
root,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
Date.now(),
|
||||
{
|
||||
status: "running",
|
||||
lineageEvent: {
|
||||
status: "started",
|
||||
workerSessionId: "worker-1",
|
||||
spawnId: "spawn-1",
|
||||
ts: "2026-05-08T00:00:00.000Z",
|
||||
},
|
||||
},
|
||||
);
|
||||
assert.equal(record.lineage.status, "started");
|
||||
assert.equal(record.lineage.currentWorkerSessionId, "worker-1");
|
||||
assert.deepEqual(record.lineage.workerSessionIds, ["worker-1"]);
|
||||
assert.equal(record.lineage.events[0].spawnId, "spawn-1");
|
||||
});
|
||||
|
||||
test("writeUnitRuntimeRecord_when_lineage_event_completed_clears_current_worker", () => {
|
||||
const root = makeProject();
|
||||
const t = Date.now();
|
||||
writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t, {
|
||||
status: "running",
|
||||
lineageEvent: {
|
||||
status: "started",
|
||||
workerSessionId: "worker-1",
|
||||
},
|
||||
});
|
||||
const record = writeUnitRuntimeRecord(
|
||||
root,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
t,
|
||||
{
|
||||
status: "completed",
|
||||
lineageEvent: {
|
||||
status: "completed",
|
||||
workerSessionId: "worker-1",
|
||||
},
|
||||
},
|
||||
);
|
||||
assert.equal(record.lineage.status, "completed");
|
||||
assert.equal(record.lineage.currentWorkerSessionId, null);
|
||||
assert.equal(record.lineage.completedWorkerSessionId, "worker-1");
|
||||
assert.equal(record.lineage.events.length, 2);
|
||||
});
|
||||
|
||||
test("writeUnitRuntimeRecord_sanitizes_path_characters", () => {
|
||||
const root = makeProject();
|
||||
writeUnitRuntimeRecord(root, "exec/task", "M001/S01/T01", Date.now(), {
|
||||
|
|
|
|||
|
|
@ -23,6 +23,11 @@ export {
|
|||
USER_SKILL_DIR,
|
||||
validateSkillFrontmatter,
|
||||
} from "../skills/index.js";
|
||||
export {
|
||||
assessAssertionCoverage,
|
||||
fulfilledAssertionIdsFromHandoff,
|
||||
requiredAssertionIdsFromContract,
|
||||
} from "./assertion-coverage.js";
|
||||
// ─── Audit & Observability ─────────────────────────────────────────────────
|
||||
export { buildAuditEnvelope, emitUokAuditEvent } from "./audit.js";
|
||||
export {
|
||||
|
|
@ -32,6 +37,11 @@ export {
|
|||
|
||||
// ─── Gates ─────────────────────────────────────────────────────────────────
|
||||
export { ChaosMonkey, ChaosMonkeyGate } from "./chaos-monkey.js";
|
||||
// ─── Model Policy ──────────────────────────────────────────────────────────
|
||||
export {
|
||||
buildWorkerContextPackProjection,
|
||||
validateWorkerContextPackProjection,
|
||||
} from "./context-pack-projection.js";
|
||||
// ─── Contracts & Types ────────────────────────────────────────────────────
|
||||
export { validateGate } from "./contracts.js";
|
||||
// ─── Coordination Store ───────────────────────────────────────────────────
|
||||
|
|
@ -48,7 +58,6 @@ export {
|
|||
} from "./diagnostic-synthesis.js";
|
||||
// ─── Dispatch Envelope ─────────────────────────────────────────────────────
|
||||
export { buildDispatchEnvelope, explainDispatch } from "./dispatch-envelope.js";
|
||||
|
||||
// ─── Execution Graph ───────────────────────────────────────────────────────
|
||||
export {
|
||||
buildExecutionGraphSnapshot,
|
||||
|
|
@ -92,8 +101,25 @@ export {
|
|||
readUokMetrics,
|
||||
writeUokMetrics,
|
||||
} from "./metrics-exposition.js";
|
||||
// ─── Model Policy ──────────────────────────────────────────────────────────
|
||||
export { applyModelPolicyFilter } from "./model-policy.js";
|
||||
export {
|
||||
DEFAULT_MODEL_ROLE_CONSTRAINTS,
|
||||
ModelRolePolicyValidationError,
|
||||
normalizeRolePolicies,
|
||||
normalizeRolePolicy,
|
||||
SUPPORTED_MODEL_ROLE_CONSTRAINTS,
|
||||
SUPPORTED_MODEL_ROLES,
|
||||
validateRolePolicy,
|
||||
} from "./model-role-policy.js";
|
||||
export {
|
||||
emitModelAutoResolvedEvent,
|
||||
modelRoleForUnitType,
|
||||
} from "./model-route-evidence.js";
|
||||
export {
|
||||
buildModelRouteSnapshot,
|
||||
redactModelConfigSecrets,
|
||||
sanitizeModelRouteSnapshot,
|
||||
} from "./model-route-snapshot.js";
|
||||
export { MultiPackageGate } from "./multi-package-gate.js";
|
||||
export { OutcomeLearningGate } from "./outcome-learning-gate.js";
|
||||
export { signalKernelEnter as signalParityEnter } from "./parity-diff-capture.js";
|
||||
|
|
@ -119,6 +145,11 @@ export {
|
|||
isExecutionEntryPhase,
|
||||
isMissingFinalizedContextResult,
|
||||
} from "./plan-v2.js";
|
||||
export {
|
||||
buildUokProgressEvent,
|
||||
UOK_PROGRESS_EVENT_TYPES,
|
||||
validateUokProgressEvent,
|
||||
} from "./progress-event.js";
|
||||
// ─── Scheduler v2 (Background Work) ────────────────────────────────────────
|
||||
export {
|
||||
CancellationToken,
|
||||
|
|
@ -138,6 +169,16 @@ export {
|
|||
TASK_TERMINAL_STATES,
|
||||
unitRuntimeToTaskState,
|
||||
} from "./task-state.js";
|
||||
export {
|
||||
normalizeCommandRegistry,
|
||||
normalizeToolCommandRegistry,
|
||||
validateCommandRegistry,
|
||||
validateToolCommandRegistry,
|
||||
} from "./tool-command-registry.js";
|
||||
export {
|
||||
normalizeUnitLineage,
|
||||
recordUnitLineageEvent,
|
||||
} from "./unit-lineage.js";
|
||||
// ─── Unit Runtime ──────────────────────────────────────────────────────────
|
||||
export {
|
||||
clearUnitRuntimeRecord,
|
||||
|
|
|
|||
|
|
@ -23,6 +23,10 @@ import {
|
|||
} from "../paths.js";
|
||||
import { getSlice, isDbAvailable } from "../sf-db.js";
|
||||
import { parseUnitId } from "../unit-id.js";
|
||||
import {
|
||||
normalizeUnitLineage,
|
||||
recordUnitLineageEvent,
|
||||
} from "./unit-lineage.js";
|
||||
/**
|
||||
* Lists every unit runtime projection status in UOK lifecycle order.
|
||||
*
|
||||
|
|
@ -177,7 +181,13 @@ export async function recordUnitOutcomeInMemory(unit, status, result) {
|
|||
? `Unit type '${unitType}' succeeded with outcome: ${outcome}`
|
||||
: `Unit type '${unitType}' failed with status: ${status} (${result?.error || "no error info"})`;
|
||||
|
||||
await createMemory("pattern", pattern, confidence);
|
||||
await createMemory({
|
||||
category: "pattern",
|
||||
content: pattern,
|
||||
confidence,
|
||||
source_unit_type: unitType,
|
||||
source_unit_id: unit.id ?? unit.unitId ?? null,
|
||||
});
|
||||
} catch (_err) {
|
||||
// Degrade gracefully - memory failures do not block UOK
|
||||
}
|
||||
|
|
@ -363,6 +373,15 @@ export function writeUnitRuntimeRecord(
|
|||
: hasUpdate(updates, "recoveryAttempts")
|
||||
? (updates.recoveryAttempts ?? 0)
|
||||
: (prev?.retryCount ?? recoveryAttempts ?? 0);
|
||||
const lineage = updates.lineageEvent
|
||||
? recordUnitLineageEvent(prev?.lineage ?? { unitType, unitId }, {
|
||||
unitType,
|
||||
unitId,
|
||||
...updates.lineageEvent,
|
||||
})
|
||||
: updates.lineage
|
||||
? normalizeUnitLineage({ unitType, unitId, ...updates.lineage })
|
||||
: prev?.lineage;
|
||||
const next = {
|
||||
version: 1,
|
||||
unitType,
|
||||
|
|
@ -407,6 +426,7 @@ export function writeUnitRuntimeRecord(
|
|||
DEFAULT_UNIT_RUNTIME_MAX_RETRIES,
|
||||
lastRecoveryReason: updates.lastRecoveryReason ?? prev?.lastRecoveryReason,
|
||||
runawayGuardPause: updates.runawayGuardPause ?? prev?.runawayGuardPause,
|
||||
...(lineage ? { lineage } : {}),
|
||||
};
|
||||
writeFileSync(path, JSON.stringify(next, null, 2) + "\n", "utf-8");
|
||||
_runtimeCache.set(path, next);
|
||||
|
|
|
|||
59
src/resources/skills/autoresearch/SKILL.md
Normal file
59
src/resources/skills/autoresearch/SKILL.md
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
---
|
||||
name: autoresearch
|
||||
description: Run an SF-native metric optimization loop. Use inside autonomous workflow when a task has a measurable target such as runtime, bundle size, test duration, model quality, or benchmark score.
|
||||
---
|
||||
|
||||
# Autoresearch
|
||||
|
||||
Use this skill when SF should improve a measurable metric by trying one focused
|
||||
change at a time, measuring it, keeping wins, and discarding regressions.
|
||||
|
||||
This is a workflow skill. Do not turn it into a user-facing guide. Produce small
|
||||
state files that another SF unit can resume.
|
||||
|
||||
## Required Inputs
|
||||
|
||||
- Goal: what metric should improve.
|
||||
- Command: how to measure it.
|
||||
- Direction: `lower` or `higher`.
|
||||
- Scope: files or packages allowed to change.
|
||||
- Constraints: tests, deps, compatibility, time budget, and off-limits paths.
|
||||
|
||||
Infer these from repo context when possible. Ask only when the metric or command
|
||||
is unknowable.
|
||||
|
||||
## State Files
|
||||
|
||||
Create or maintain these at repo root unless the project already has a better
|
||||
experiment directory:
|
||||
|
||||
- `autoresearch.md`: objective, metric, command, scope, constraints, current
|
||||
best, and what has been tried.
|
||||
- `autoresearch.sh`: fast reproducible measurement script. It must print
|
||||
`METRIC name=value`.
|
||||
- `autoresearch.jsonl`: one JSON object per experiment.
|
||||
- `autoresearch.checks.sh`: optional correctness backpressure.
|
||||
|
||||
Keep `autoresearch.md` terse. It is a resume surface, not a report.
|
||||
|
||||
## Loop
|
||||
|
||||
1. Read existing state and establish the current best metric.
|
||||
2. Choose one hypothesis.
|
||||
3. Edit only in scope.
|
||||
4. Run `timeout 600 bash autoresearch.sh`.
|
||||
5. Run `timeout 300 bash autoresearch.checks.sh` if it exists.
|
||||
6. Log result as `keep`, `discard`, `crash`, or `checks_failed`.
|
||||
7. Keep improvements. Revert focused regressions.
|
||||
8. Update `autoresearch.md` with one line per meaningful lesson.
|
||||
|
||||
Stop only when the stated budget or target is reached, the user interrupts, or
|
||||
the repo is no longer in a safe state to continue.
|
||||
|
||||
## SF Integration
|
||||
|
||||
- Treat each experiment as one autonomous unit with a single hypothesis.
|
||||
- Prefer cheap checks before expensive benchmarks.
|
||||
- Use SF/UOK gates when available for commit and parity safety.
|
||||
- Do not write long research narratives. Preserve enough evidence to resume and
|
||||
audit the decision.
|
||||
44
src/resources/skills/human-writing/SKILL.md
Normal file
44
src/resources/skills/human-writing/SKILL.md
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
---
|
||||
name: human-writing
|
||||
description: Write and edit concise human-readable docs, plans, records, PR text, and handoffs. Use when creating or revising documentation or other prose that should be sparse, direct, and low-context.
|
||||
---
|
||||
|
||||
# Human Writing
|
||||
|
||||
Use this skill for prose that humans will read later: docs, plans, records,
|
||||
handoffs, PR notes, and status summaries.
|
||||
|
||||
## Default Style
|
||||
|
||||
- Keep it sparse. Prefer the shortest version that preserves decisions,
|
||||
evidence, commands, and next actions.
|
||||
- Write like an engineer leaving a useful note for another engineer.
|
||||
- Use concrete nouns and exact file, command, model, endpoint, date, or runtime
|
||||
names when they matter.
|
||||
- Remove filler, generic framing, hype, and recap paragraphs that do not change
|
||||
what the reader can do.
|
||||
- Preserve uncertainty honestly. Say what is known, what is inferred, and what
|
||||
still needs verification.
|
||||
- Prefer bullets for scan-heavy material. Prefer short paragraphs for context or
|
||||
rationale.
|
||||
|
||||
## Docs Context Budget
|
||||
|
||||
When editing docs, reduce future context load:
|
||||
|
||||
- Keep root docs and agent instructions as routing maps, not full doctrine.
|
||||
- Move deep detail into narrowly named reference docs only when it will be reused.
|
||||
- Delete duplicated explanations instead of rephrasing them in multiple places.
|
||||
- Prefer links to canonical docs over pasted summaries.
|
||||
- Keep generated or temporary research out of hand-maintained docs unless it has
|
||||
become a durable decision.
|
||||
|
||||
## Rewrite Pass
|
||||
|
||||
Before finishing prose, do one compression pass:
|
||||
|
||||
1. Delete throat-clearing and obvious statements.
|
||||
2. Collapse repeated ideas into one canonical sentence.
|
||||
3. Replace broad claims with observed facts.
|
||||
4. Keep only examples that prevent likely misuse.
|
||||
5. End with the current state and the next useful action, if there is one.
|
||||
58
src/resources/skills/sf-wiki/SKILL.md
Normal file
58
src/resources/skills/sf-wiki/SKILL.md
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
---
|
||||
name: sf-wiki
|
||||
description: Build a sparse SF-native codebase wiki or subsystem map. Use inside autonomous workflow when SF needs durable repo orientation, architecture maps, or generated reference docs.
|
||||
---
|
||||
|
||||
# SF Wiki
|
||||
|
||||
Use this skill to generate compact repository documentation for future agents and
|
||||
humans. The output is an SF-native codebase map, not a Factory upload.
|
||||
|
||||
## Output
|
||||
|
||||
Default location:
|
||||
|
||||
- `.sf/wiki/` for runtime working context.
|
||||
- `docs/generated/wiki/` only when the user wants tracked generated docs.
|
||||
|
||||
Prefer `.sf/wiki/` for autonomous workflow context so tracked docs stay sparse.
|
||||
|
||||
## Survey
|
||||
|
||||
Use two passes:
|
||||
|
||||
1. Structural: README, AGENTS, package manifests, build/test config, docs,
|
||||
entrypoints, CI, and top-level directories.
|
||||
2. Source: routes, commands, providers, services, workflows, tests, generated
|
||||
artifacts, and feature flags.
|
||||
|
||||
Reconcile source-derived topics with directory enumeration. If a non-trivial
|
||||
directory is skipped, record the reason in one sentence.
|
||||
|
||||
## Pages
|
||||
|
||||
Keep the wiki small by default:
|
||||
|
||||
- `index.md`: what this repo is, how to run it, where to start.
|
||||
- `architecture.md`: major subsystems and data/control flow.
|
||||
- `workflows.md`: build, test, release, autonomous/SF flows.
|
||||
- `subsystems.md`: table of subsystem, path, purpose, owner signal, tests.
|
||||
- `glossary.md`: project terms only.
|
||||
|
||||
Add subsystem pages only when the table would become unreadable.
|
||||
|
||||
## Style
|
||||
|
||||
- Optimize for context reload, not publication.
|
||||
- Use bullets and tables.
|
||||
- Link to canonical docs instead of copying them.
|
||||
- Avoid prose history unless it changes how someone should work.
|
||||
- Mark stale or uncertain facts explicitly.
|
||||
|
||||
## Verification
|
||||
|
||||
Before finishing:
|
||||
|
||||
- Check every referenced path exists or is intentionally historical.
|
||||
- Run cheap repo discovery commands again if files changed during the scan.
|
||||
- Leave a short freshness note with date, commit, and commands used.
|
||||
|
|
@ -45,6 +45,8 @@ test("buildAutoBootstrapContext includes purpose docs and source inventory", ()
|
|||
assert.match(context, /ACE spec-first TDD/);
|
||||
assert.match(context, /explorer-style subagents/);
|
||||
assert.match(context, /harness-engineering principles/);
|
||||
assert.match(context, /review\/export or recovery surfaces/);
|
||||
assert.match(context, /canonical structured runtime state/);
|
||||
assert.match(context, /## \.sf\/PROJECT\.md/);
|
||||
assert.match(context, /## VISION\.md/);
|
||||
assert.match(context, /## TODO\.md/);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,13 @@
|
|||
import { describe, expect, it, beforeEach, afterEach } from "vitest";
|
||||
import { mkdtempSync, rmSync, readFileSync, existsSync, readdirSync } from "node:fs";
|
||||
import {
|
||||
existsSync,
|
||||
mkdtempSync,
|
||||
readdirSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { configureLogger, getLogger, resetLoggerConfig } from "../logger.js";
|
||||
|
||||
describe("logger", () => {
|
||||
|
|
@ -64,7 +70,9 @@ describe("logger", () => {
|
|||
log.info("key is sk-ant-abc123def456");
|
||||
// The customSink receives record.message where pattern redaction
|
||||
// has already been applied (via buildRedactingSink).
|
||||
expect(logs.some((l) => l.includes("[REDACTED]") && !l.includes("sk-ant-"))).toBe(true);
|
||||
expect(
|
||||
logs.some((l) => l.includes("[REDACTED]") && !l.includes("sk-ant-")),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("home directory paths are redacted to ~", async () => {
|
||||
|
|
@ -82,7 +90,9 @@ describe("logger", () => {
|
|||
// The customSink receives record.message where pattern redaction
|
||||
// has already been applied (via buildRedactingSink).
|
||||
expect(logs.some((l) => l.includes("~/projects/foo"))).toBe(true);
|
||||
expect(logs.some((l) => l.includes(home) && !l.includes("~"))).toBe(false);
|
||||
expect(logs.some((l) => l.includes(home) && !l.includes("~"))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -150,12 +160,14 @@ describe("logger", () => {
|
|||
mode: "autonomous",
|
||||
logDir: tmpDir,
|
||||
customSink: (record) => {
|
||||
lines.push(JSON.stringify({
|
||||
ts: record.timestamp,
|
||||
level: record.level,
|
||||
category: record.category,
|
||||
message: record.message,
|
||||
}));
|
||||
lines.push(
|
||||
JSON.stringify({
|
||||
ts: record.timestamp,
|
||||
level: record.level,
|
||||
category: record.category,
|
||||
message: record.message,
|
||||
}),
|
||||
);
|
||||
},
|
||||
});
|
||||
const log = getLogger("sf.autonomous");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue