feat: improve sf runtime self-reload and safeguards

This commit is contained in:
Mikael Hugo 2026-05-08 23:52:35 +02:00
parent c5e9e4f9c8
commit e4c951ff0c
44 changed files with 3411 additions and 146 deletions

View file

@ -0,0 +1,43 @@
# UOK Droid Accounting Integration
## Purpose
Capture the useful Droid mission-accounting patterns as SF-native UOK contracts without importing Droid's model configuration, file-first state model, or mission runtime shape.
## Adopted Value
- Role-based model policy: durable intent is `auto` plus symbolic constraints, never concrete provider/model IDs.
- Model route evidence: runtime-selected provider/model is recorded as evidence after SF's existing auto selector runs.
- Secret-safe model route snapshots: reproducibility metadata is retained while API keys, headers, and tokens are redacted.
- Typed progress events: stable machine event names such as `unit_selected`, `model_auto_resolved`, `unit_completed`, and `unit_blocked`.
- Structured unit handoff: closeout fields for changed files, tests, commands, failures, leftover work, verification status, and fulfilled assertions.
- Assertion coverage: compare required validation assertions with fulfilled handoff assertions.
- Worker lineage: track worker session IDs, current/completed/failed session IDs, and lifecycle events.
- Tool command registry: worker-visible command allowlists are explicit; Droid-style `services` is treated only as a legacy daemon-service alias.
## Live Now
- `model_auto_resolved`, `unit_selected`, `unit_completed`, and `unit_blocked` progress events are emitted into the existing journal stream alongside legacy events.
- `model-auto-resolved` UOK audit payloads include secret-safe route snapshots.
- UOK outcome memory recording uses the real `createMemory({ ... })` API and records source unit metadata.
- Focused tests cover the projection/accounting helpers and the UOK memory integration.
## Deliberately Deferred
- No DB migration yet. These are projections and contracts until real query/recovery needs prove first-class rows are worth it.
- No hard closeout gate yet. `assessAssertionCoverage()` exists, but completion semantics should change only with a focused gate test.
- No context-pack renderer yet. `buildWorkerContextPackProjection()` is a shape builder, not the prompt renderer.
- No command enforcement yet. `normalizeToolCommandRegistry()` constrains advertised tools/commands; execution enforcement must wire through existing permission/run-control paths.
- No new model router. SF's existing auto model selector remains the only route resolver.
## Next Integration Order
1. Persist worker lineage from real dispatch lifecycle events.
2. Add assertion coverage as a closeout gate before marking a unit complete.
3. Render worker context packs from existing SF DB/runtime state.
4. Feed the tool command registry into worker prompts.
5. Promote selected projection fields into SQLite only after dashboards/recovery flows need durable queries.
## Stop Rule
Do not add more schema helpers for this Droid accounting thread until an existing SF runtime path needs a concrete caller. The next work should be integration into existing UOK dispatch, closeout, prompt, or recovery flows.

View file

@ -0,0 +1,31 @@
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join } from "node:path";
import { test } from "vitest";
const source = readFileSync(
join(
process.cwd(),
"packages/pi-coding-agent/src/modes/interactive/interactive-mode.ts",
),
"utf-8",
);
test("interactive_tui_autoreload_uses_existing_reload_path", () => {
assert.match(
source,
/Purpose: make the TUI pick up SF's own code\/resource fixes/,
);
assert.match(source, /private startAutoReloadWatcher\(\): void/);
assert.match(source, /private async checkAutoReload\(\): Promise<void>/);
assert.match(source, /await this\.handleReloadCommand\(\)/);
});
test("interactive_tui_runtime_reload_exits_for_launcher_restart", () => {
const reloadStart = source.indexOf("private async handleReloadCommand()");
assert.ok(reloadStart >= 0, "handleReloadCommand should exist");
const reloadBody = source.slice(reloadStart, reloadStart + 1200);
assert.match(reloadBody, /computeInteractiveRuntimeFingerprint\(\)/);
assert.match(reloadBody, /process\.exit\(INTERACTIVE_RELOAD_EXIT_CODE\)/);
});

View file

@ -15,11 +15,12 @@ function renderTool(
details?: Record<string, unknown>;
},
toolDefinition?: { label?: string },
options: { startedAt?: number } = {},
): string {
const component = new ToolExecutionComponent(
toolName,
args,
{},
options,
toolDefinition as any,
{ requestRender() {} } as any,
);
@ -138,4 +139,17 @@ describe("ToolExecutionComponent", () => {
assert.match(rendered, /Plan Milestone/);
assert.doesNotMatch(rendered, /sf_plan_milestone/);
});
test("tool frame header includes ISO minute timestamp", () => {
const startedAt = new Date(2026, 4, 8, 20, 51, 0).getTime();
const rendered = renderTool(
"Read",
{ path: "/tmp/demo.txt" },
undefined,
undefined,
{ startedAt },
);
assert.match(rendered, /2026-05-08 20:51/);
});
});

View file

@ -196,7 +196,7 @@ export class AssistantMessageComponent extends Container {
}
}
if (message.stopReason && message.timestamp) {
if (!hasToolContent && message.stopReason && message.timestamp) {
const timeStr = formatTimestamp(
message.timestamp,
this.timestampFormat,

View file

@ -30,6 +30,7 @@ import { getLanguageFromPath, highlightCode, theme } from "../theme/theme.js";
import { shortenPath } from "../utils/shorten-path.js";
import { renderDiff } from "./diff.js";
import { keyHint } from "./keybinding-hints.js";
import { formatTimestamp } from "./timestamp.js";
import { truncateToVisualLines } from "./visual-truncate.js";
// Preview line limit for bash when not expanded
@ -111,6 +112,7 @@ function renderToolFrame(
status: string;
tone: ToolFrameTone;
modelLabel?: string;
startedAt?: number;
},
): string[] {
const outerWidth = Math.max(20, width);
@ -130,6 +132,9 @@ function renderToolFrame(
const leftStyled = theme.fg(labelColor, theme.bold(`${opts.label}`));
const statusStyled = theme.fg(statusColor, opts.status);
let rightStyled = statusStyled;
if (opts.startedAt !== undefined) {
rightStyled = `${theme.fg("dim", formatTimestamp(opts.startedAt, "date-time-iso"))}${theme.fg("dim", " · ")}${rightStyled}`;
}
if (opts.modelLabel) {
const separatorStyled = theme.fg("dim", " · ");
const modelWidth =
@ -215,6 +220,7 @@ function formatCompactArgs(args: unknown, expanded: boolean): string {
export interface ToolExecutionOptions {
showImages?: boolean; // default: true (only used if terminal supports images)
modelLabel?: string;
startedAt?: number;
}
type WriteHighlightCache = {
@ -238,6 +244,7 @@ export class ToolExecutionComponent extends Container {
private expanded = false;
private showImages: boolean;
private modelLabel?: string;
private startedAt: number;
private isPartial = true;
private toolDefinition?: ToolDefinition;
private ui: TUI;
@ -283,6 +290,7 @@ export class ToolExecutionComponent extends Container {
this.args = args;
this.showImages = options.showImages ?? true;
this.modelLabel = options.modelLabel?.trim() || undefined;
this.startedAt = options.startedAt ?? Date.now();
this.toolDefinition = toolDefinition;
this.ui = ui;
this.cwd = cwd;
@ -619,6 +627,7 @@ export class ToolExecutionComponent extends Container {
status: frameStatus,
tone: frameTone,
modelLabel: this.modelLabel,
startedAt: this.startedAt,
});
return framed.length > 0 ? ["", ...framed] : framed;
}

View file

@ -301,6 +301,7 @@ export async function handleAgentEvent(
{
showImages: host.settingsManager.getShowImages(),
modelLabel: modelLabelFromAssistant(host.streamingMessage),
startedAt: host.streamingMessage.timestamp,
},
host.getRegisteredToolDefinition(content.name),
host.ui,
@ -319,6 +320,7 @@ export async function handleAgentEvent(
{
showImages: host.settingsManager.getShowImages(),
modelLabel: modelLabelFromAssistant(host.streamingMessage),
startedAt: host.streamingMessage.timestamp,
},
undefined,
host.ui,
@ -767,6 +769,7 @@ export async function handleAgentEvent(
{
showImages: host.settingsManager.getShowImages(),
modelLabel: modelLabelFromHost(host),
startedAt: Date.now(),
},
host.getRegisteredToolDefinition(event.toolName),
host.ui,

View file

@ -198,6 +198,111 @@ function computeInteractiveRuntimeFingerprint(): string {
return hash.digest("hex").slice(0, 16);
}
const AUTO_RELOAD_INTERVAL_MS = 2_500;
const AUTO_RELOAD_RESOURCE_EXTENSIONS = new Set([
".cjs",
".js",
".json",
".md",
".mjs",
".ts",
".tsx",
".yaml",
".yml",
]);
const AUTO_RELOAD_IGNORED_DIRS = new Set([
".git",
"node_modules",
"dist",
"target",
]);
/**
* Collect reload-relevant files under a runtime resource path.
*
* Purpose: let the TUI notice self-improvement edits to loaded extensions,
* skills, prompts, and themes without asking the user to run `/reload`.
*
* Consumer: computeInteractiveResourceFingerprint() during the TUI autoreload
* polling loop.
*/
function collectInteractiveResourceFiles(resourcePath: string): string[] {
let stat: fs.Stats;
try {
stat = fs.statSync(resourcePath);
} catch {
return [];
}
if (stat.isFile()) {
return AUTO_RELOAD_RESOURCE_EXTENSIONS.has(path.extname(resourcePath))
? [resourcePath]
: [];
}
if (!stat.isDirectory()) return [];
const files: string[] = [];
const stack = [resourcePath];
while (stack.length > 0) {
const current = stack.pop();
if (!current) continue;
let entries: fs.Dirent[];
try {
entries = fs.readdirSync(current, { withFileTypes: true });
} catch {
continue;
}
for (const entry of entries) {
const fullPath = path.join(current, entry.name);
if (entry.isDirectory()) {
if (!AUTO_RELOAD_IGNORED_DIRS.has(entry.name)) stack.push(fullPath);
continue;
}
if (
entry.isFile() &&
AUTO_RELOAD_RESOURCE_EXTENSIONS.has(path.extname(entry.name))
) {
files.push(fullPath);
}
}
}
return files;
}
/**
* Hash the loaded resource file set using paths, mtimes, and sizes.
*
* Purpose: detect changed extension/skill/prompt/theme resources cheaply enough
* to poll in an interactive TUI, while avoiding expensive full-content hashing.
*
* Consumer: InteractiveMode.startAutoReloadWatcher().
*/
function computeInteractiveResourceFingerprint(
resourcePaths: Iterable<string>,
): string {
const files = [
...new Set([...resourcePaths].flatMap(collectInteractiveResourceFiles)),
].sort();
const hash = crypto.createHash("sha256");
for (const file of files) {
try {
const stat = fs.statSync(file);
hash.update(file);
hash.update("\0");
hash.update(String(stat.mtimeMs));
hash.update("\0");
hash.update(String(stat.size));
hash.update("\0");
} catch {
hash.update(file);
hash.update("\0missing\0");
}
}
return hash.digest("hex").slice(0, 16);
}
/**
* Options for InteractiveMode initialization.
*/
@ -246,6 +351,10 @@ export class InteractiveMode {
private isInitialized = false;
private readonly processRestartFingerprint =
computeInteractiveRuntimeFingerprint();
private resourceReloadFingerprint: string | undefined;
private autoReloadTimer: NodeJS.Timeout | undefined;
private autoReloadInProgress = false;
private autoReloadPendingReason: string | undefined;
private onInputCallback?: (text: string) => void;
private loadingAnimation: Loader | undefined = undefined;
private readonly defaultWorkingMessage = "Working...";
@ -665,6 +774,7 @@ export class InteractiveMode {
this._branchChangeUnsub = this.footerDataProvider.onBranchChange(() => {
this.ui.requestRender();
});
this.startAutoReloadWatcher();
// Initialize available provider count for footer display
await this.updateAvailableProviderCount();
@ -2638,7 +2748,11 @@ export class InteractiveMode {
const component = new ToolExecutionComponent(
content.name,
content.arguments,
{ showImages: this.settingsManager.getShowImages(), modelLabel },
{
showImages: this.settingsManager.getShowImages(),
modelLabel,
startedAt: message.timestamp,
},
this.getRegisteredToolDefinition(content.name),
this.ui,
);
@ -2671,7 +2785,11 @@ export class InteractiveMode {
const component = new ToolExecutionComponent(
content.name,
content.input ?? {},
{ showImages: this.settingsManager.getShowImages(), modelLabel },
{
showImages: this.settingsManager.getShowImages(),
modelLabel,
startedAt: message.timestamp,
},
undefined,
this.ui,
);
@ -3744,6 +3862,105 @@ export class InteractiveMode {
// Command handlers
// =========================================================================
/**
* Start polling loaded runtime resources for self-improvement changes.
*
* Purpose: make the TUI pick up SF's own code/resource fixes as soon as it
* is idle, instead of requiring a human to type `/reload` after an agent
* updates extensions, skills, prompts, themes, or restart-sensitive runtime
* modules.
*
* Consumer: initialize() after extension and branch watchers are installed.
*/
private startAutoReloadWatcher(): void {
if (process.env.SF_TUI_AUTORELOAD === "0") return;
if (this.autoReloadTimer) return;
this.refreshAutoReloadResourceFingerprint();
this.autoReloadTimer = setInterval(() => {
void this.checkAutoReload();
}, AUTO_RELOAD_INTERVAL_MS);
this.autoReloadTimer.unref?.();
}
/**
* Stop the TUI autoreload watcher.
*
* Purpose: avoid a background interval keeping the process alive or firing
* after the UI has been stopped during shutdown/reload.
*
* Consumer: stop().
*/
private stopAutoReloadWatcher(): void {
if (!this.autoReloadTimer) return;
clearInterval(this.autoReloadTimer);
this.autoReloadTimer = undefined;
}
/**
* Snapshot the currently loaded extension/skill/prompt/theme resources.
*
* Purpose: establish the post-load baseline used to tell whether a future
* self-improvement changed the runtime resources that `/reload` would refresh.
*
* Consumer: startAutoReloadWatcher(), handleReloadCommand().
*/
private refreshAutoReloadResourceFingerprint(): void {
this.resourceReloadFingerprint = computeInteractiveResourceFingerprint(
this.session.resourceLoader.getPathMetadata().keys(),
);
}
/**
* Check whether runtime or resource files changed and reload when idle.
*
* Purpose: turn self-improvement writes into an automatic reload/restart as
* soon as the TUI can safely do it, while avoiding interruption during an
* active model stream or compaction.
*
* Consumer: startAutoReloadWatcher() interval callback.
*/
private async checkAutoReload(): Promise<void> {
if (this.autoReloadInProgress) return;
const runtimeChanged =
computeInteractiveRuntimeFingerprint() !== this.processRestartFingerprint;
const resourceFingerprint = computeInteractiveResourceFingerprint(
this.session.resourceLoader.getPathMetadata().keys(),
);
const resourcesChanged =
this.resourceReloadFingerprint !== undefined &&
resourceFingerprint !== this.resourceReloadFingerprint;
if (!runtimeChanged && !resourcesChanged && !this.autoReloadPendingReason) {
return;
}
const reason =
this.autoReloadPendingReason ??
(runtimeChanged
? "runtime changed on disk"
: "resources changed on disk");
if (this.session.isStreaming || this.session.isCompacting) {
this.autoReloadPendingReason = reason;
return;
}
this.autoReloadInProgress = true;
this.autoReloadPendingReason = undefined;
try {
this.showStatus(`Auto-reload: ${reason}; reloading SF...`);
this.ui.requestRender();
await this.handleReloadCommand();
this.refreshAutoReloadResourceFingerprint();
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
this.showWarning(`Auto-reload failed: ${message}`);
} finally {
this.autoReloadInProgress = false;
}
}
private async handleReloadCommand(): Promise<void> {
if (this.session.isStreaming) {
this.showWarning(
@ -3791,6 +4008,7 @@ export class InteractiveMode {
try {
await this.session.reload();
this.refreshAutoReloadResourceFingerprint();
setRegisteredThemes(this.session.resourceLoader.getThemes().themes);
this.hideThinkingBlock = this.settingsManager.getHideThinkingBlock();
const themeName = this.settingsManager.getTheme();
@ -4094,6 +4312,7 @@ export class InteractiveMode {
// Clean up branch change listener (Fix 1)
this._branchChangeUnsub?.();
this._branchChangeUnsub = undefined;
this.stopAutoReloadWatcher();
// Clean up theme change listener and watcher (Fix 2)
onThemeChange(() => {});

View file

@ -273,6 +273,18 @@ export class TUI extends Container {
private stopped = false;
private _lastRenderedComponents: string[] | null = null;
// === Sticky bottom scrolling ===
private isScrolledToBottom = true; // Track if user is scrolled to bottom
// === Autonomous mode info bar ===
public autonomousStatus?: {
currentSlice?: string;
sliceStatus?: string;
progress?: number;
totalTasks?: number;
completedTasks?: number;
};
// Overlay stack for modal components rendered on top of base content
private focusOrderCounter = 0;
private overlayStack: {
@ -547,6 +559,101 @@ export class TUI extends Container {
});
}
/**
* Check if user is scrolled to the bottom of the content
*/
private isAtBottom(): boolean {
const height = this.terminal.rows;
const viewportTop = Math.max(0, this.maxLinesRendered - height);
const viewportBottom = viewportTop + height;
return viewportBottom >= this.previousLines.length;
}
/**
* Scroll to bottom of content (sticky bottom)
*/
private scrollToBottom(): void {
const height = this.terminal.rows;
const contentHeight = this.previousLines.length;
if (contentHeight <= height) return; // No scrolling needed if content fits in viewport
// For terminal scrolling, we can use cursor movement or scroll sequences
// The simplest approach is to move the cursor to the bottom line
const viewportTop = Math.max(0, contentHeight - height);
const targetScreenRow = contentHeight - 1;
const currentScreenRow = this.hardwareCursorRow - this.previousViewportTop;
const lineDiff = targetScreenRow - currentScreenRow;
if (lineDiff > 0) {
this.terminal.write(`\x1b[${lineDiff}B`); // Move cursor down
} else if (lineDiff < 0) {
this.terminal.write(`\x1b[${-lineDiff}A`); // Move cursor up
}
this.previousViewportTop = viewportTop;
this.isScrolledToBottom = true;
}
/**
* Update autonomous status information
*/
updateAutonomousStatus(status: {
currentSlice?: string;
sliceStatus?: string;
progress?: number;
totalTasks?: number;
completedTasks?: number;
}): void {
this.autonomousStatus = status;
this.requestRender();
}
/**
* Render autonomous mode info bar
*/
private renderAutonomousStatusBar(width: number): string[] {
if (!this.autonomousStatus) return [];
const { currentSlice, sliceStatus, progress, totalTasks, completedTasks } =
this.autonomousStatus;
const lines: string[] = [];
// Create status bar line
let statusLine = "\x1b[90m│ AUTONOMOUS MODE ";
if (currentSlice) {
statusLine += `\x1b[97mSlice: \x1b[96m${currentSlice} `;
}
if (sliceStatus) {
statusLine += `\x1b[97mStatus: \x1b[92m${sliceStatus} `;
}
if (progress !== undefined) {
const progressBar = this.createProgressBar(progress, width - 30);
statusLine += `\x1b[97mProgress: \x1b[93m${progressBar} `;
}
if (totalTasks !== undefined && completedTasks !== undefined) {
statusLine += `\x1b[97mTasks: \x1b[95m${completedTasks}/${totalTasks} `;
}
statusLine += "\x1b[90m│\x1b[0m";
lines.push(statusLine);
return lines;
}
/**
* Create a simple ASCII progress bar
*/
private createProgressBar(progress: number, width: number): string {
const barWidth = Math.min(20, Math.max(5, width));
const filled = Math.floor((progress / 100) * barWidth);
const empty = barWidth - filled;
return `[${"█".repeat(filled)}${"░".repeat(empty)}] ${progress}%`;
}
private handleInput(data: string): void {
if (this.inputListeners.size > 0) {
let current = data;
@ -579,6 +686,14 @@ export class TUI extends Container {
return;
}
// Detect scrolling keys (Page Up/Down, arrow keys) to break sticky bottom
if (
this.isScrolledToBottom &&
(matchesKey(data, "pageUp") || matchesKey(data, "up"))
) {
this.isScrolledToBottom = false;
}
// If focused component is an overlay, verify it's still visible
// (visibility can change due to terminal resize or visible() callback)
const focusedOverlay = this.overlayStack.find(
@ -595,6 +710,17 @@ export class TUI extends Container {
}
}
// Enter key scrolling behavior: if not at bottom, scroll down instead of sending input
if (data === "\r" || data === "\n") {
// Enter key
if (!this.isAtBottom()) {
// Scroll down one page or to bottom
this.scrollToBottom();
return;
}
// If we're at bottom, let Enter pass through to focused component
}
// Pass input to focused component (including Ctrl+C)
// The focused component can decide how to handle Ctrl+C
if (this.focusedComponent?.handleInput) {
@ -674,11 +800,22 @@ export class TUI extends Container {
// Render all components to get new lines
let newLines = this.render(width);
// Add autonomous status bar at the top if in autonomous mode
const statusBarLines = this.renderAutonomousStatusBar(width);
if (statusBarLines.length > 0) {
newLines = [...statusBarLines, ...newLines];
}
// Check if content grew and we should scroll to bottom (sticky bottom behavior)
const contentGrew = newLines.length > this.previousLines.length;
const shouldScrollToBottom = contentGrew && this.isScrolledToBottom;
// Skip ALL post-processing if component output is unchanged.
// Container.render() returns the same array reference when stable.
if (
newLines === this._lastRenderedComponents &&
this.overlayStack.length === 0
this.overlayStack.length === 0 &&
!shouldScrollToBottom
) {
return;
}
@ -997,6 +1134,11 @@ export class TUI extends Container {
this.maxLinesRendered = Math.max(this.maxLinesRendered, newLines.length);
this.previousViewportTop = Math.max(0, this.maxLinesRendered - height);
// Apply sticky bottom behavior if content grew and user was at bottom
if (shouldScrollToBottom) {
this.scrollToBottom();
}
// Position hardware cursor for IME
this.positionHardwareCursor(cursorPos, newLines.length);

View file

@ -194,7 +194,7 @@ export function buildAutoBootstrapContext(basePath: string): string {
"SF headless autonomous found no milestones. Use the repository files below as the seed context.",
"Research SF working specs first, then every relevant markdown document and every source file path before creating the initial milestone plan.",
"Use tool-based repository inspection for source contents; do not assume the seed excerpt is complete.",
"Treat .sf/PROJECT.md, .sf/REQUIREMENTS.md, .sf/DECISIONS.md, .sf/KNOWLEDGE.md, and .sf/RUNTIME.md as SF's canonical working spec/state docs when present.",
"Treat .sf/PROJECT.md, .sf/REQUIREMENTS.md, .sf/DECISIONS.md, .sf/KNOWLEDGE.md, and .sf/RUNTIME.md as review/export or recovery surfaces when present; `.sf/sf.db` remains the canonical structured runtime state.",
"Treat any root-level SPEC.md, BASE_SPEC.md, PRODUCT_SPEC.md, docs/specs files, or other docs as repo evidence for humans. Project facts SF needs later into SF's .sf working model and DB-backed state; do not create a parallel base-spec system.",
"For product-facing or workflow-facing work, research the product category and representative competitors before locking requirements or slices. Capture table stakes, differentiators, common failure modes, and what not to copy.",
"Extract the project purpose, vision, architecture, constraints, current TODOs, risks, eval/gate ideas, and implementation backlog.",

View file

@ -8,20 +8,20 @@
* Consumer: Every module in src/ and packages/ that needs application logging.
*/
import { mkdirSync } from "node:fs";
import { join } from "node:path";
import { getRotatingFileSink } from "@logtape/file";
import {
configure,
reset,
getLogger as logtapeGetLogger,
getConsoleSink,
getJsonLinesFormatter,
type LogRecord,
getLogger as logtapeGetLogger,
reset,
type Sink,
} from "@logtape/logtape";
import { getPrettyFormatter } from "@logtape/pretty";
import { getRotatingFileSink } from "@logtape/file";
import { redactByPattern, redactByField } from "@logtape/redaction";
import { mkdirSync } from "node:fs";
import { join } from "node:path";
import { redactByField, redactByPattern } from "@logtape/redaction";
export interface LoggerOptions {
/** Session identifier for per-session log directories. */
@ -108,7 +108,9 @@ function buildRedactingSink(
*
* Consumer: src/cli.ts early in startup, and test suites.
*/
export async function configureLogger(options: LoggerOptions = {}): Promise<void> {
export async function configureLogger(
options: LoggerOptions = {},
): Promise<void> {
if (configured) {
return;
}
@ -204,12 +206,17 @@ export async function configureLogger(options: LoggerOptions = {}): Promise<void
*
* Consumer: Every migrated module calls `const log = getLogger("sf.core.env")`.
*/
export function getLogger(category: string): ReturnType<typeof logtapeGetLogger> {
export function getLogger(
category: string,
): ReturnType<typeof logtapeGetLogger> {
return logtapeGetLogger(category.split("."));
}
function inferMode(): "dev" | "autonomous" {
if (process.env.SF_AUTONOMOUS === "1" || process.env.NODE_ENV === "production") {
if (
process.env.SF_AUTONOMOUS === "1" ||
process.env.NODE_ENV === "production"
) {
return "autonomous";
}
return "dev";

View file

@ -7,6 +7,7 @@
* - Redacts secrets from tool results before the LLM sees them
* - Blocks dangerous bash commands (rm -rf, sudo, mkfs, etc.)
* - Blocks writes to protected paths (.env, .git, .ssh, etc.)
* - Registers SF slash commands: /safegit, /safegit-level, /safegit-status, /yolo
*/
import * as path from "node:path";
@ -222,7 +223,7 @@ const GIT_PATTERNS = [
},
// Medium risk
{ pattern: /\bgit\s+push\b/i, action: "push", severity: "medium" },
{ pattern: /\bgit\s+commit\b/i, action: "commit", severity: "medium" },
// git commit removed - commits are now allowed without prompting
{ pattern: /\bgit\s+rebase\b/i, action: "rebase", severity: "medium" },
{ pattern: /\bgit\s+merge\b/i, action: "merge", severity: "medium" },
{
@ -414,6 +415,15 @@ async function checkGitCommand(
}
return undefined;
}
/**
* Register SF slash commands for session-local safe-git controls.
*
* Purpose: expose Git guardrail controls through the SF command surface rather
* than as shell binaries, so users can type `/safegit` in chat/TUI and update
* the current session's Git approval policy.
*
* Consumer: guardrails(pi) during extension registration.
*/
function registerSafeGitCommands(
pi,
sessionEnabledOverride,
@ -421,7 +431,8 @@ function registerSafeGitCommands(
yoloPreviousPromptLevel,
) {
pi.registerCommand("safegit", {
description: "Toggle safe-git protection on/off for this session",
description:
"Slash command /safegit: toggle safe-git protection for this session",
handler: async (_, ctx) => {
const { enabled } = getSafeGitConfig(
ctx,
@ -439,7 +450,8 @@ function registerSafeGitCommands(
},
});
pi.registerCommand("safegit-level", {
description: "Set prompt level: high, medium, or none",
description:
"Slash command /safegit-level: set prompt level to high, medium, or none",
handler: async (args, ctx) => {
const arg = typeof args === "string" ? args.trim().toLowerCase() : "";
if (arg === "high" || arg === "medium" || arg === "none") {
@ -479,7 +491,8 @@ function registerSafeGitCommands(
},
});
pi.registerCommand("yolo", {
description: "Toggle session-only safe-git prompt bypass",
description:
"Slash command /yolo: toggle session-only safe-git prompt bypass",
handler: async (_, ctx) => {
const { promptLevel } = getSafeGitConfig(
ctx,
@ -506,7 +519,8 @@ function registerSafeGitCommands(
},
});
pi.registerCommand("safegit-status", {
description: "Show safe-git status and settings",
description:
"Slash command /safegit-status: show safe-git status and settings",
handler: async (_, ctx) => {
const settings = ctx.settingsManager?.getSettings() ?? {};
const globalConfig = {

View file

@ -87,11 +87,13 @@ import { handleProductAudit } from "../tools/product-audit-tool.js";
import { parseUnitId } from "../unit-id.js";
import { resolveUokFlags } from "../uok/flags.js";
import { UokGateRunner } from "../uok/gate-runner.js";
import { emitModelAutoResolvedEvent } from "../uok/model-route-evidence.js";
import {
ensurePlanV2Graph as ensurePlanningFlowGraph,
isEmptyPlanV2GraphResult,
isMissingFinalizedContextResult,
} from "../uok/plan-v2.js";
import { buildUokProgressEvent } from "../uok/progress-event.js";
import {
clearUnitRuntimeRecord,
writeUnitRuntimeRecord,
@ -1875,6 +1877,25 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
eventType: "unit-start",
data: { unitType, unitId },
});
{
const progressEvent = buildUokProgressEvent({
eventType: "unit_selected",
unitType,
unitId,
role: "worker",
sessionId: ctx.sessionManager.getSessionId(),
traceId: ic.flowId,
data: { legacyEventType: "unit-start" },
});
deps.emitJournalEvent({
ts: progressEvent.ts,
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: progressEvent.eventType,
data: progressEvent,
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
ctx.ui.notify(`[unit] ${unitType} ${unitId} starting`, "info");
deps.captureAvailableSkills();
writeUnitRuntimeRecord(
@ -1890,6 +1911,11 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
progressCount: 0,
lastProgressKind: "dispatch",
recoveryAttempts: 0, // Reset so re-dispatched units get full recovery budget (#2322)
lineageEvent: {
status: "started",
workerSessionId: ctx.sessionManager.getSessionId(),
note: "unit dispatched",
},
},
);
// Status bar (widget + preconditions deferred until after model selection — see #2899)
@ -2125,6 +2151,48 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
s.currentDispatchedModelId = s.currentUnitModel
? `${s.currentUnitModel.provider ?? ""}/${s.currentUnitModel.id ?? ""}`
: null;
emitModelAutoResolvedEvent(s.basePath, {
traceId: `model:${ctx.sessionManager.getSessionId()}:${unitType}:${unitId}`,
unitType,
unitId,
resolvedModel: s.currentUnitModel ?? ctx.model ?? null,
authMode:
(s.currentUnitModel?.provider ?? ctx.model?.provider)
? ctx.modelRegistry.getProviderAuthMode(
s.currentUnitModel?.provider ?? ctx.model.provider,
)
: undefined,
routingReason: hookModelOverride
? `hook override: ${hookModelOverride}`
: "auto selector",
routing: s.currentUnitRouting,
hookOverrideApplied: Boolean(hookModelOverride),
tokenUsage: collectSessionTokenUsage?.(ctx),
});
{
const progressEvent = buildUokProgressEvent({
eventType: "model_auto_resolved",
unitType,
unitId,
role: "worker",
sessionId: ctx.sessionManager.getSessionId(),
traceId: ic.flowId,
data: {
resolvedProvider: s.currentUnitModel?.provider ?? ctx.model?.provider,
resolvedModel: s.currentUnitModel?.id ?? ctx.model?.id,
routing: s.currentUnitRouting,
hookOverrideApplied: Boolean(hookModelOverride),
},
});
deps.emitJournalEvent({
ts: progressEvent.ts,
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: progressEvent.eventType,
data: progressEvent,
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
const compatibilityError = getWorkflowTransportSupportError(
s.currentUnitModel?.provider ?? ctx.model?.provider,
getRequiredWorkflowToolsForAutoUnit(unitType),
@ -2724,6 +2792,75 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
},
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
if (
currentUnitResult.status === "completed" ||
currentUnitResult.status === "blocked"
) {
const progressEvent = buildUokProgressEvent({
eventType:
currentUnitResult.status === "completed"
? "unit_completed"
: "unit_blocked",
unitType,
unitId,
role: "worker",
sessionId: ctx.sessionManager.getSessionId(),
traceId: ic.flowId,
data: {
status: currentUnitResult.status,
artifactVerified,
legacyEventType: "unit-end",
...(unitEndEntry
? {
cost_usd: unitEndEntry.cost,
tokens: unitEndEntry.tokens.total,
}
: {}),
},
});
deps.emitJournalEvent({
ts: progressEvent.ts,
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: progressEvent.eventType,
data: progressEvent,
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
}
{
const runtimeStatus =
currentUnitResult.status === "completed"
? artifactVerified
? "completed"
: "blocked"
: currentUnitResult.status === "error"
? "failed"
: currentUnitResult.status;
const lineageStatus =
runtimeStatus === "completed"
? "completed"
: runtimeStatus === "blocked"
? "blocked"
: runtimeStatus === "cancelled"
? "cancelled"
: "failed";
writeUnitRuntimeRecord(
s.basePath,
unitType,
unitId,
s.currentUnit?.startedAt ?? Date.now(),
{
status: runtimeStatus,
lastProgressAt: Date.now(),
lastProgressKind: "unit-end",
lineageEvent: {
status: lineageStatus,
workerSessionId: ctx.sessionManager.getSessionId(),
note: `unit ended with ${currentUnitResult.status}`,
},
},
);
}
{
const verdict =

View file

@ -663,7 +663,7 @@ export function buildAutonomousSolverMissingCheckpointRepairPrompt(
"",
"**Low-confidence reconstruction guidance**:",
"- Use outcome='decide' when evidence is sparse or ambiguous (confidence < 0.98)",
"- Use outcome='decide' when you cannot verify what work was actually completed",
"- Use outcome='decide' when you cannot verify what work was actually completed",
"- Use outcome='decide' when there are multiple possible interpretations of progress",
"- This ensures autonomous mode pauses for human acceptance rather than guessing incorrectly",
);

View file

@ -63,6 +63,7 @@ import {
recordToolResult as safetyRecordToolResult,
saveEvidenceToDisk,
} from "../safety/evidence-collector.js";
import { initSessionRecorder } from "../session-recorder.js";
import { deriveState } from "../state.js";
import { countGoogleGeminiCliTokens } from "../token-counter.js";
import { parseUnitId } from "../unit-id.js";
@ -163,6 +164,10 @@ export function registerHooks(pi, ecosystemHandlers = []) {
if (sid) {
process.stderr.write(`[forge] session ${sid.slice(0, 8)} · ${sfile}\n`);
}
// Establish the session row so all subsequent turns have a parent.
// Git context (repo, branch) is patched in before_agent_start once the
// DB is open and the cwd is confirmed.
initSessionRecorder(sid, { mode: "interactive", cwd: process.cwd() });
} catch {
/* non-fatal */
}
@ -176,6 +181,24 @@ export function registerHooks(pi, ecosystemHandlers = []) {
await syncServiceTierStatus(ctx);
await initializeLearningRuntime();
await runSessionStartupDoctorFix(ctx);
// Initialize metrics-central with database adapter
try {
const { initMetricsCentral } = await import("../metrics-central.js");
const { getDatabase } = await import("../sf-db.js");
const dbAdapter = getDatabase();
const sessionId = ctx.sessionManager?.getSessionId?.() || "";
initMetricsCentral(process.cwd(), {
sessionId,
dbAdapter,
});
} catch (err) {
// Non-fatal: metrics should not block session start
const { logWarning } = await import("../workflow-logger.js");
logWarning(
"session-start",
`Failed to initialize metrics-central: ${err instanceof Error ? err.message : String(err)}`,
);
}
// Apply show_token_cost preference (#1515)
try {
const { loadEffectiveSFPreferences } = await import("../preferences.js");
@ -373,6 +396,15 @@ export function registerHooks(pi, ecosystemHandlers = []) {
await syncServiceTierStatus(ctx);
await initializeLearningRuntime();
loadToolApiKeys();
// Re-establish session recorder for the new session so turn recording
// continues under the correct session_id without contaminating the
// previous session's rows.
try {
const sid = ctx.sessionManager?.getSessionId?.() ?? "";
initSessionRecorder(sid, { mode: "interactive", cwd: process.cwd() });
} catch {
/* non-fatal */
}
});
pi.on("before_agent_start", async (event, ctx) => {
// Refresh the ecosystem snapshot BEFORE running ecosystem handlers so they
@ -383,9 +415,45 @@ export function registerHooks(pi, ecosystemHandlers = []) {
const basePath = process.cwd();
const state = await deriveState(basePath);
updateSnapshot(state);
// Patch git context on the first turn now that the DB is confirmed open.
// Best-effort: git may be absent (e.g. tmp dirs), so we swallow errors.
try {
const { execFileSync } = await import("node:child_process");
const branch = execFileSync(
"git",
["rev-parse", "--abbrev-ref", "HEAD"],
{
cwd: basePath,
encoding: "utf-8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 3_000,
},
).trim();
const remoteUrl = execFileSync(
"git",
["config", "--get", "remote.origin.url"],
{
cwd: basePath,
encoding: "utf-8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 3_000,
},
).trim();
patchSessionGitContext(remoteUrl || null, branch || null);
} catch {
/* non-fatal: git absent or not a repo */
}
} catch {
updateSnapshot(null);
}
// Record user message as the start of a new turn. Done after ensureDbOpen
// so the turns row lands in the DB immediately; agent_end will patch the
// assistant_response onto it once the model finishes.
try {
recordTurnStart(typeof event.prompt === "string" ? event.prompt : null);
} catch {
/* non-fatal: turn recording must never block the agent */
}
// Await ecosystem loading, then dispatch any registered handlers.
await getEcosystemReadyPromise();
for (const handler of ecosystemHandlers) {
@ -401,6 +469,19 @@ export function registerHooks(pi, ecosystemHandlers = []) {
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
await handleAgentEnd(pi, event, ctx);
// Complete the pending turn row with the assistant's text response.
// event.messages is an array; the last entry is the model's reply.
// Its .content is an array of content blocks — extract the first text block.
try {
const msgs = Array.isArray(event.messages) ? event.messages : [];
const lastMsg = msgs[msgs.length - 1];
const textBlock = Array.isArray(lastMsg?.content)
? lastMsg.content.find((b) => b.type === "text" && b.text)
: null;
recordTurnEnd(textBlock?.text ?? null);
} catch {
/* non-fatal: turn recording must never block agent teardown */
}
// Best-effort embedding backfill: when SF_LLM_GATEWAY_KEY is set and the
// gateway has an embed worker online, embed any memories that don't yet
// have a vector. Bounded per invocation; logs once-per-minute when the
@ -538,7 +619,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
// Return custom compaction summary that preserves work state
// instead of cancelling compaction
return {
const result = {
compaction: {
summary:
workState.length > 0
@ -553,9 +634,31 @@ export function registerHooks(pi, ecosystemHandlers = []) {
},
},
};
// Persist compaction summary as the session's most recent work description
// so memory-pipeline ingestion has a compact semantic handle for retrieval.
try {
updateSessionSummary(result.compaction.summary);
} catch {
/* non-fatal */
}
return result;
});
pi.on("session_shutdown", async (_event, ctx) => {
// Flush any in-flight turn (e.g. interrupted agent) and clear session state
// so the recorder doesn't carry stale IDs into a subsequent process reuse.
try {
resetSessionRecorder();
} catch {
/* non-fatal */
}
resetLearningRuntime();
// Stop metrics-central on session shutdown
try {
const { stopMetricsCentral } = await import("../metrics-central.js");
stopMetricsCentral();
} catch {
// Non-fatal: cleanup should not block shutdown
}
if (isParallelActive()) {
try {
await shutdownParallel(process.cwd());
@ -581,6 +684,14 @@ export function registerHooks(pi, ecosystemHandlers = []) {
if (loopCheck.block) {
return { block: true, reason: loopCheck.reason };
}
// ── Session file-touch recording ──────────────────────────────────────
// Best-effort: path may be absent for non-file tools; recordFileTouch
// no-ops on non-write tools and when no session is active.
try {
recordFileTouch(event.toolName, event.input?.path ?? null);
} catch {
/* non-fatal */
}
// ── Research unit terminal transition enforcement ─────────────────────
// After a research unit (research-slice/research-milestone) successfully
// saves its RESEARCH artifact via sf_summary_save, the tool returns
@ -961,6 +1072,22 @@ export function registerHooks(pi, ecosystemHandlers = []) {
});
pi.on("tool_execution_end", async (event) => {
markToolEnd(event.toolCallId);
// Record tool execution performance metrics
try {
const { recordToolExecution } = await import("../metrics-central.js");
recordToolExecution(
event.toolName,
event.durationMs,
event.isError,
event.isError
? typeof event.result === "string"
? event.result
: "tool_error"
: undefined,
);
} catch {
// Non-fatal: metrics should not break tool execution
}
// #2883/#4974: Capture deterministic invocation/policy errors so
// postUnitPreVerification can break the retry loop instead of re-dispatching.
// Covers sf_ tool JSON errors AND write-gate blocks on write/edit/bash tools.
@ -1040,6 +1167,7 @@ export function registerHooks(pi, ecosystemHandlers = []) {
return { messages };
});
pi.on("before_provider_request", async (event, ctx) => {
const modelId = event.model?.id;
const payload = event.payload;
if (!payload || typeof payload !== "object") return;
applyCompletionNudgeTemperature(payload);
@ -1101,7 +1229,6 @@ export function registerHooks(pi, ecosystemHandlers = []) {
}
}
// ── Service Tier ────────────────────────────────────────────────────
const modelId = event.model?.id;
if (!modelId) {
ctx.ui.setStatus("sf-gemini-tokens", undefined);
return payload;

View file

@ -474,10 +474,10 @@ function readMemoryDbStatus(adapter) {
async function probeEmbedding(gatewayConfig, createGatewayEmbedFn) {
const startedAt = Date.now();
try {
const embedFn = createGatewayEmbedFn({
...gatewayConfig,
timeoutMs: 10_000,
});
const embedFn = createGatewayEmbedFn(
{ ...gatewayConfig, timeoutMs: 10_000 },
{ instruction: gatewayConfig.queryInstruction },
);
const vectors = await embedFn(["sf memory status embedding probe"]);
const dim = vectors[0]?.length ?? 0;
if (dim <= 0) {

View file

@ -356,15 +356,35 @@ async function listItems(args, ctx) {
}
async function markDone(args, ctx) {
const idPrefix = _joinPlain(_splitArgs(args));
const parts = _splitArgs(args);
let idPrefix = "";
let scope = "project";
for (let i = 0; i < parts.length; i++) {
const p = parts[i];
if (p === "--scope" || p === "-s") {
scope = parts[++i];
continue;
}
if (!idPrefix) {
idPrefix = p;
}
}
if (!idPrefix) {
ctx.ui.notify("Usage: /schedule done \u003cid\u003e", "warning");
ctx.ui.notify("Usage: /schedule done [--scope <scope>] <id>", "warning");
return;
}
if (scope !== "project" && scope !== "global") {
ctx.ui.notify(`Unknown scope: ${scope}. Valid: project, global`, "warning");
return;
}
const store = createScheduleStore(_basePath());
const { entry } = _findEntry(store, "project", idPrefix);
const { entry } = _findEntry(store, scope, idPrefix);
if (!entry) {
ctx.ui.notify(`Item ${idPrefix} not found in project scope.`, "warning");
ctx.ui.notify(`Item ${idPrefix} not found in ${scope} scope.`, "warning");
return;
}
const updated = {
@ -372,20 +392,40 @@ async function markDone(args, ctx) {
status: "done",
created_at: new Date().toISOString(),
};
store.appendEntry("project", updated);
store.appendEntry(scope, updated);
ctx.ui.notify(`Marked done: ${entry.id}`, "success");
}
async function markCancel(args, ctx) {
const idPrefix = _joinPlain(_splitArgs(args));
const parts = _splitArgs(args);
let idPrefix = "";
let scope = "project";
for (let i = 0; i < parts.length; i++) {
const p = parts[i];
if (p === "--scope" || p === "-s") {
scope = parts[++i];
continue;
}
if (!idPrefix) {
idPrefix = p;
}
}
if (!idPrefix) {
ctx.ui.notify("Usage: /schedule cancel \u003cid\u003e", "warning");
ctx.ui.notify("Usage: /schedule cancel [--scope <scope>] <id>", "warning");
return;
}
if (scope !== "project" && scope !== "global") {
ctx.ui.notify(`Unknown scope: ${scope}. Valid: project, global`, "warning");
return;
}
const store = createScheduleStore(_basePath());
const { entry } = _findEntry(store, "project", idPrefix);
const { entry } = _findEntry(store, scope, idPrefix);
if (!entry) {
ctx.ui.notify(`Item ${idPrefix} not found in project scope.`, "warning");
ctx.ui.notify(`Item ${idPrefix} not found in ${scope} scope.`, "warning");
return;
}
const updated = {
@ -393,7 +433,7 @@ async function markCancel(args, ctx) {
status: "cancelled",
created_at: new Date().toISOString(),
};
store.appendEntry("project", updated);
store.appendEntry(scope, updated);
ctx.ui.notify(`Cancelled: ${entry.id}`, "success");
}
@ -401,10 +441,13 @@ async function snoozeItem(args, ctx) {
const parts = _splitArgs(args);
let idPrefix = "";
let by = "";
let scope = "project";
for (let i = 0; i < parts.length; i++) {
if (parts[i] === "--by" || parts[i] === "-b") {
by = parts[++i];
} else if (parts[i] === "--scope" || parts[i] === "-s") {
scope = parts[++i];
} else if (!idPrefix) {
idPrefix = parts[i];
}
@ -412,16 +455,21 @@ async function snoozeItem(args, ctx) {
if (!idPrefix || !by) {
ctx.ui.notify(
"Usage: /schedule snooze \u003cid\u003e --by \u003cduration\u003e",
"Usage: /schedule snooze [--scope <scope>] <id> --by <duration>",
"warning",
);
return;
}
if (scope !== "project" && scope !== "global") {
ctx.ui.notify(`Unknown scope: ${scope}. Valid: project, global`, "warning");
return;
}
const store = createScheduleStore(_basePath());
const { entry } = _findEntry(store, "project", idPrefix);
const { entry } = _findEntry(store, scope, idPrefix);
if (!entry) {
ctx.ui.notify(`Item ${idPrefix} not found in project scope.`, "warning");
ctx.ui.notify(`Item ${idPrefix} not found in ${scope} scope.`, "warning");
return;
}
@ -444,29 +492,43 @@ async function snoozeItem(args, ctx) {
created_at: now,
snoozed_at: now,
};
store.appendEntry("project", updated);
store.appendEntry(scope, updated);
ctx.ui.notify(`Snoozed: ${entry.id}\nNew due: ${newDue}`, "success");
}
async function runItem(args, ctx) {
const parts = _splitArgs(args);
let idPrefix = "";
let scope = "project";
let dryRun = false;
for (const part of parts) {
if (part === "--dry-run" || part === "--dry") {
dryRun = true;
continue;
} else if (part === "--scope" || part === "-s") {
scope = parts[parts.indexOf(part) + 1];
} else if (!idPrefix) {
idPrefix = part;
}
if (!idPrefix) idPrefix = part;
}
if (!idPrefix) {
ctx.ui.notify("Usage: /schedule run [--dry-run] \u003cid\u003e", "warning");
ctx.ui.notify(
"Usage: /schedule run [--scope <scope>] [--dry-run] <id>",
"warning",
);
return;
}
if (scope !== "project" && scope !== "global") {
ctx.ui.notify(`Unknown scope: ${scope}. Valid: project, global`, "warning");
return;
}
const store = createScheduleStore(_basePath());
const { entry } = _findEntry(store, "project", idPrefix);
const { entry } = _findEntry(store, scope, idPrefix);
if (!entry) {
ctx.ui.notify(`Item ${idPrefix} not found in project scope.`, "warning");
ctx.ui.notify(`Item ${idPrefix} not found in ${scope} scope.`, "warning");
return;
}
@ -490,7 +552,8 @@ async function runItem(args, ctx) {
id: entry.id,
kind: entry.kind,
status: entry.status,
cwd: _basePath(),
scope: scope,
cwd: scope === "project" ? _basePath() : undefined,
command,
autonomous_dispatch: entry.autonomous_dispatch === true,
would_execute: typeof command === "string" && command.length > 0,
@ -502,6 +565,18 @@ async function runItem(args, ctx) {
);
return;
}
// Global scope commands cannot execute (no repo context)
if (scope === "global") {
ctx.ui.notify(
`Cannot execute global scope command: ${entry.id}\n` +
`Global commands can only be run in project context.\n` +
`Use project scope for executable commands.`,
"warning",
);
return;
}
const result = executeProjectScheduleCommand(_basePath(), entry);
if (!result.ok) {
ctx.ui.notify(`Command failed: ${result.reason}`, "error");
@ -566,10 +641,10 @@ export async function handleSchedule(args, ctx) {
"Usage: /schedule add|list|done|cancel|snooze|run\n" +
" add --in \u003cduration\u003e [--kind \u003ckind\u003e] [--scope \u003cscope\u003e] [--autonomous-dispatch] \u003ctitle-or-command\u003e\n" +
" list [--due] [--all] [--json] [--scope \u003cscope\u003e]\n" +
" done \u003cid\u003e\n" +
" cancel \u003cid\u003e\n" +
" snooze \u003cid\u003e --by \u003cduration\u003e\n" +
" run [--dry-run] \u003cid\u003e",
" done [--scope \u003cscope\u003e] \u003cid\u003e\n" +
" cancel [--scope \u003cscope\u003e] \u003cid\u003e\n" +
" snooze [--scope \u003cscope\u003e] \u003cid\u003e --by \u003cduration\u003e\n" +
" run [--scope \u003cscope\u003e] [--dry-run] \u003cid\u003e",
"info",
);
return;

View file

@ -726,19 +726,27 @@ export async function handleCoreCommand(trimmed, ctx, pi) {
return true;
}
// Normal list mode
const { loadSkills, getPermittedSkills, getModelInvocableSkills } =
await import("../../skills/loader.js");
const skills = loadSkills(projectRoot());
const {
loadSkills,
getPermittedSkills,
getModelInvocableSkills,
getUserInvocableSkills,
} = await import("../../skills/loader.js");
const skills = loadSkills(projectRoot(), { includeBundled: true });
const visibleSkills = getUserInvocableSkills(skills);
const mode = getAutoSession().getMode();
const permitted = getPermittedSkills(skills, mode.permissionProfile);
const modelInvocable = getModelInvocableSkills(skills, mode.workMode);
const permitted = getPermittedSkills(visibleSkills, mode.permissionProfile);
const modelInvocable = getModelInvocableSkills(
visibleSkills,
mode.workMode,
);
const lines = ["SF Skills\n"];
lines.push(
`Found ${skills.length} skill(s) · ${permitted.length} permitted · ${modelInvocable.length} model-invocable\n`,
`Found ${visibleSkills.length} user-invocable skill(s) · ${permitted.length} permitted · ${modelInvocable.length} model-invocable\n`,
);
for (const skill of skills) {
for (const skill of visibleSkills) {
const icon = skill.valid ? "✓" : "✗";
const user = skill.userInvocable ? "U" : " ";
const model = skill.modelInvocable ? "M" : " ";

View file

@ -26,12 +26,56 @@ function logRerankUnavailable(msg) {
lastRerankUnavailableLogAt = now;
logWarning("memory-embeddings", msg);
}
// Circuit breaker for the embed path. When the remote gateway is unreachable
// (network timeout, cold-start stall), each call would otherwise wait the full
// DEFAULT_TIMEOUT_MS (30 s) before failing. After EMBED_CIRCUIT_THRESHOLD
// consecutive failures the circuit opens for EMBED_CIRCUIT_OPEN_MS and returns
// [] immediately — callers fall through to keyword-only ranking with no stall.
// The circuit half-opens automatically after the cooldown expires.
const EMBED_CIRCUIT_THRESHOLD = 3;
const EMBED_CIRCUIT_OPEN_MS = 60_000;
const embedCircuit = { failures: 0, openUntil: 0, lastLogAt: 0 };
function embedCircuitIsOpen() {
return embedCircuit.openUntil > Date.now();
}
function onEmbedSuccess() {
embedCircuit.failures = 0;
embedCircuit.openUntil = 0;
}
function onEmbedFailure() {
embedCircuit.failures += 1;
if (embedCircuit.failures >= EMBED_CIRCUIT_THRESHOLD) {
embedCircuit.openUntil = Date.now() + EMBED_CIRCUIT_OPEN_MS;
const now = Date.now();
if (now - embedCircuit.lastLogAt >= EMBED_CIRCUIT_OPEN_MS) {
embedCircuit.lastLogAt = now;
logWarning(
"memory-embeddings",
`llm-gateway /embeddings circuit open after ${EMBED_CIRCUIT_THRESHOLD} failures; ` +
`skipping embed for ${EMBED_CIRCUIT_OPEN_MS / 1000}s — memory search falls back to keyword ranking`,
);
}
}
}
const ENV_KEY = "SF_LLM_GATEWAY_KEY";
const ENV_URL = "SF_LLM_GATEWAY_URL";
const ENV_EMBED_MODEL = "SF_LLM_GATEWAY_EMBED_MODEL";
const ENV_RERANK_MODEL = "SF_LLM_GATEWAY_RERANK_MODEL";
const ENV_EMBED_QUERY_INSTRUCTION = "SF_LLM_GATEWAY_EMBED_QUERY_INSTRUCTION";
const DEFAULT_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
const DEFAULT_RERANK_MODEL = "Qwen/Qwen3-Reranker-0.6B";
// Qwen3-Embedding uses asymmetric retrieval: queries are prefixed with a task
// instruction so the model projects them into the "query" region of the embedding
// space, while document texts are sent as-is (no instruction) so they land in
// the "passage" region. Mixing these correctly is critical for retrieval quality.
//
// Format expected by the model: "Instruct: <task>\nQuery: " followed by the
// query text (the gateway appends the text to the instruction). Documents omit
// the instruction entirely.
//
// References: Qwen3-Embedding model card (HuggingFace) §Asymmetric Retrieval.
const DEFAULT_QUERY_INSTRUCTION =
"Instruct: Retrieve relevant software engineering memories, facts, and project decisions for the given query\nQuery: ";
const KEY_ALIASES = [
ENV_KEY,
"LLM_GATEWAY_API_KEY",
@ -50,7 +94,11 @@ function firstEnvValue(keys) {
return firstEnvEntry(keys)?.value ?? "";
}
/** Read gateway config from env. Returns null when SF_LLM_GATEWAY_KEY is
* missing the gateway path is opt-in and silently absent otherwise. */
* missing the gateway path is opt-in and silently absent otherwise.
*
* `queryInstruction` is the Qwen3-style task instruction prepended to query
* texts during retrieval. Document texts (backfill) are sent without it.
* Override via SF_LLM_GATEWAY_EMBED_QUERY_INSTRUCTION. */
export function loadGatewayConfigFromEnv() {
const keyEntry = firstEnvEntry(KEY_ALIASES);
if (!keyEntry) return null;
@ -59,6 +107,8 @@ export function loadGatewayConfigFromEnv() {
const embeddingModel =
firstEnvValue([ENV_EMBED_MODEL]) || DEFAULT_EMBEDDING_MODEL;
const rerankModel = firstEnvValue([ENV_RERANK_MODEL]) || DEFAULT_RERANK_MODEL;
const queryInstruction =
firstEnvValue([ENV_EMBED_QUERY_INSTRUCTION]) || DEFAULT_QUERY_INSTRUCTION;
return {
url,
apiKey: keyEntry.value,
@ -66,45 +116,70 @@ export function loadGatewayConfigFromEnv() {
urlSource: urlEntry?.key ?? "default",
embeddingModel,
rerankModel,
queryInstruction,
};
}
/** Build an EmbedFn that posts to <url>/embeddings with Bearer auth.
* Returns Float32Array[] in the same order as the input. Throws on HTTP
* errors so the caller (embedMemories) logs and counts as zero. */
export function createGatewayEmbedFn(config) {
* errors so the caller (embedMemories) logs and counts as zero.
* A circuit breaker short-circuits to [] after EMBED_CIRCUIT_THRESHOLD
* consecutive failures so a down/cold gateway never stalls the caller for
* the full 30 s timeout on every call.
*
* `opts.instruction` when set, included as the top-level `instruction`
* field in the request body. Qwen3-Embedding uses this for asymmetric
* retrieval: pass `config.queryInstruction` for query embeddings; omit for
* document/memory backfill so passages land in the correct embedding region. */
export function createGatewayEmbedFn(config, opts) {
return async (texts) => {
if (texts.length === 0) return [];
// Circuit open — fail fast, no network call.
if (embedCircuitIsOpen()) return [];
const controller = new AbortController();
const timeout = setTimeout(
() => controller.abort(),
config.timeoutMs ?? DEFAULT_TIMEOUT_MS,
);
try {
const body = {
model: config.embeddingModel,
input: texts,
};
if (opts?.instruction) {
body.instruction = opts.instruction;
}
const res = await fetch(`${config.url}/embeddings`, {
method: "POST",
headers: {
Authorization: `Bearer ${config.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: config.embeddingModel,
input: texts,
}),
body: JSON.stringify(body),
signal: controller.signal,
});
if (!res.ok) {
const body = await res.text().catch(() => "");
// Throw immediately — the outer catch handles onEmbedFailure once.
throw new Error(
`llm-gateway /embeddings ${res.status}: ${body.slice(0, 200)}`,
);
}
const json = await res.json();
if (!Array.isArray(json.data)) {
// Throw — the outer catch handles onEmbedFailure once.
throw new Error("llm-gateway /embeddings: missing data array");
}
// Sort by index to handle out-of-order responses defensively.
const sorted = [...json.data].sort((a, b) => a.index - b.index);
return sorted.map((d) => Float32Array.from(d.embedding));
const result = sorted.map((d) => Float32Array.from(d.embedding));
onEmbedSuccess();
return result;
} catch (err) {
// Catch AbortError (timeout) and all thrown errors from above — all
// count as a circuit failure. onEmbedFailure is called exactly once
// per failed request regardless of failure mode.
onEmbedFailure();
throw err;
} finally {
clearTimeout(timeout);
}

View file

@ -236,7 +236,12 @@ export function rankMemoriesByEmbedding(
.sort((a, b) => b.combinedScore - a.combinedScore);
}
/** Embed `query` via the configured gateway and return its Float32Array, or
* null when no gateway is configured / the embed call fails. Best-effort. */
* null when no gateway is configured / the embed call fails. Best-effort.
*
* Passes `config.queryInstruction` so Qwen3-Embedding projects the query into
* the correct asymmetric retrieval region of the embedding space. Document
* embeddings (backfill) are created without an instruction the two must be
* consistent for cosine similarity to be meaningful. */
export async function embedQueryViaGateway(query) {
if (!query.trim()) return null;
try {
@ -245,7 +250,9 @@ export async function embedQueryViaGateway(query) {
);
const cfg = loadGatewayConfigFromEnv();
if (!cfg) return null;
const embedFn = createGatewayEmbedFn(cfg);
const embedFn = createGatewayEmbedFn(cfg, {
instruction: cfg.queryInstruction,
});
const vectors = await embedFn([query]);
return vectors[0] ?? null;
} catch (err) {

View file

@ -29,6 +29,128 @@ const FLUSH_RETRY_MAX = 3;
const FLUSH_RETRY_BASE_MS = 1000;
const METRIC_NAME_PATTERN = /^[a-zA-Z_:][a-zA-Z0-9_:]*$/;
// ─── Metrics System Performance Monitoring ──────────────────────────────────
let _metricsSystemStartTime = Date.now();
let _flushCount = 0;
let _flushSuccessCount = 0;
let _flushFailureCount = 0;
let _lastFlushDuration = 0;
let _lastFlushTimestamp = 0;
let _totalFlushDuration = 0;
/**
* Get metrics system performance stats.
*/
export function getMetricsSystemStats() {
const uptime = Date.now() - _metricsSystemStartTime;
return {
uptimeMs: uptime,
uptimeSeconds: Math.floor(uptime / 1000),
flushCount: _flushCount,
flushSuccessCount: _flushSuccessCount,
flushFailureCount: _flushFailureCount,
successRate:
_flushCount > 0
? `${((_flushSuccessCount / _flushCount) * 100).toFixed(1)}%`
: "0%",
lastFlushDuration: _lastFlushDuration,
lastFlushTimestamp: _lastFlushTimestamp,
averageFlushDuration:
_flushSuccessCount > 0
? Math.round(_totalFlushDuration / _flushSuccessCount)
: 0,
databaseStatus: _dbAdapter ? "connected" : "disconnected",
};
}
/**
* Get system performance dashboard metrics.
* Returns a formatted summary of key performance indicators.
*/
export function getSystemPerformanceDashboard() {
const systemStats = getMetricsSystemStats();
const registry = getRegistry();
return {
uptime: systemStats.uptimeSeconds,
metricsSystemHealth: {
status: systemStats.databaseStatus,
successRate: systemStats.successRate,
flushCount: systemStats.flushCount,
averageFlushDuration: `${systemStats.averageFlushDuration}ms`,
},
cost: extractMetricValue(registry, "sf_cost_total"),
tokens: {
input: extractMetricValue(registry, "sf_tokens_input_total"),
output: extractMetricValue(registry, "sf_tokens_output_total"),
},
performance: {
averageToolExecution: extractMetricHistogramMean(
registry,
"sf_tool_execution_duration_ms",
),
averageModelRequest: extractMetricHistogramMean(
registry,
"sf_model_request_duration_ms",
),
averageDatabaseQuery: extractMetricHistogramMean(
registry,
"sf_database_query_duration_ms",
),
},
errors: {
tool: extractMetricValue(registry, "sf_tool_errors_total"),
model: extractMetricValue(registry, "sf_model_errors_total"),
database: extractMetricValue(registry, "sf_database_errors_total"),
system: extractMetricValue(registry, "sf_system_warnings_total"),
},
resources: {
activeSessions: extractMetricGaugeValue(
registry,
"sf_active_sessions_count",
),
activeAgents: extractMetricGaugeValue(registry, "sf_active_agents_count"),
concurrentToolCalls: extractMetricGaugeValue(
registry,
"sf_concurrent_tool_calls",
),
},
};
}
/**
* Extract a metric value from the registry.
*/
function extractMetricValue(registry, metricName) {
const metric = registry.counters.get(metricName);
if (!metric) return 0;
let total = 0;
for (const value of metric.values.values()) total += value;
return total;
}
/**
* Extract histogram mean value.
*/
function extractMetricHistogramMean(registry, metricName) {
const hist = registry.histograms.get(metricName);
if (!hist || hist.count === 0) return 0;
return Math.round(hist.sum / hist.count);
}
/**
* Extract gauge value.
*/
function extractMetricGaugeValue(registry, metricName) {
const gauge = registry.gauges.get(metricName);
if (!gauge || gauge.values.size === 0) return 0;
// For gauges, return the most recent value
const values = Array.from(gauge.values.values());
return values[values.length - 1] ?? 0;
}
// ─── Metric Types ───────────────────────────────────────────────────────────
class Counter {
@ -266,6 +388,7 @@ class MetricsRegistry {
let _registry = null;
let _flushTimer = null;
let _metricsHealthTimer = null;
let _basePath = "";
let _sessionId = "";
let _dbAdapter = null;
@ -318,7 +441,7 @@ function persistMetricsToDb(registry, sessionId, db) {
);
for (const c of registry.counters.values()) {
for (const [key, value] of c.values) {
const labels = c._parseKey(key);
const labels = _parseLabelKey(key);
insert.run(
c.name,
"counter",
@ -331,7 +454,7 @@ function persistMetricsToDb(registry, sessionId, db) {
}
for (const g of registry.gauges.values()) {
for (const [key, value] of g.values) {
const labels = g._parseKey(key);
const labels = _parseLabelKey(key);
insert.run(
g.name,
"gauge",
@ -361,6 +484,10 @@ function persistMetricsToDb(registry, sessionId, db) {
function flushMetrics() {
if (!_basePath) return;
const flushStartTime = Date.now();
_flushCount++;
try {
const text = getRegistry().buildText();
const path = metricsFilePath(_basePath);
@ -370,8 +497,35 @@ function flushMetrics() {
if (_dbAdapter) {
persistMetricsToDb(getRegistry(), _sessionId, _dbAdapter);
}
// Update performance metrics
_flushSuccessCount++;
_lastFlushDuration = Date.now() - flushStartTime;
_lastFlushTimestamp = Date.now();
_totalFlushDuration += _lastFlushDuration;
_flushFailures = 0;
// Record flush performance metrics
try {
getRegistry()
.counter(
"sf_metrics_flush_success_total",
"Total successful metrics flushes",
[],
)
.inc({}, 1);
getRegistry()
.gauge(
"sf_metrics_flush_duration_ms",
"Duration of last metrics flush in milliseconds",
[],
)
.set({}, _lastFlushDuration);
} catch {
// Best effort - don't let metrics recording break the flush
}
} catch (err) {
_flushFailureCount++;
_flushFailures++;
logWarning(
"metrics-central",
@ -411,6 +565,17 @@ export function initMetricsCentral(basePath, opts = {}) {
_dbAdapter = opts.dbAdapter ?? null;
const interval = opts.flushIntervalMs ?? FLUSH_INTERVAL_MS;
// Reset metrics system stats on fresh init
if (!_flushTimer) {
_metricsSystemStartTime = Date.now();
_flushCount = 0;
_flushSuccessCount = 0;
_flushFailureCount = 0;
_lastFlushDuration = 0;
_lastFlushTimestamp = 0;
_totalFlushDuration = 0;
}
if (_flushTimer) clearInterval(_flushTimer);
_flushTimer = setInterval(flushMetrics, interval);
@ -421,6 +586,64 @@ export function initMetricsCentral(basePath, opts = {}) {
if (_dbAdapter) {
ensureMetricsTable(_dbAdapter);
}
// Start periodic metrics system health reporting
if (!_metricsHealthTimer) {
_metricsHealthTimer = setInterval(() => {
try {
updateMetricsSystemHealth();
} catch {
// Non-fatal
}
}, 300000); // Every 5 minutes
if (_metricsHealthTimer.unref) _metricsHealthTimer.unref();
}
}
/**
* Update metrics system health metrics.
*/
function updateMetricsSystemHealth() {
const registry = getRegistry();
try {
// Record system uptime
const uptime = Math.floor((Date.now() - _metricsSystemStartTime) / 1000);
registry
.gauge(
"sf_metrics_system_uptime_seconds",
"Metrics system uptime in seconds",
[],
)
.set({}, uptime);
// Record database status
registry
.gauge(
"sf_metrics_database_status",
"Database connection status (1=connected, 0=disconnected)",
["project_path"],
)
.set({ project_path: _basePath || "unknown" }, _dbAdapter ? 1 : 0);
// Record in-memory metrics count
let totalMetrics = 0;
totalMetrics += registry.counters.size;
totalMetrics += registry.gauges.size;
totalMetrics += registry.histograms.size;
registry
.gauge(
"sf_metrics_active_count",
"Number of active metrics in memory",
[],
)
.set({}, totalMetrics);
} catch (err) {
logWarning(
"metrics-central",
`Failed to update health metrics: ${err.message}`,
);
}
}
/**
@ -431,6 +654,10 @@ export function stopMetricsCentral() {
clearInterval(_flushTimer);
_flushTimer = null;
}
if (_metricsHealthTimer) {
clearInterval(_metricsHealthTimer);
_metricsHealthTimer = null;
}
// Final flush attempt
flushMetrics();
_basePath = "";
@ -511,6 +738,112 @@ export function recordCost(
recordGauge("sf_cost_last", cost, { unit_id: unitId, model_id: modelId });
}
/**
* Record tool execution performance.
*
* @param {string} toolName name of the tool
* @param {number} durationMs execution duration in milliseconds
* @param {boolean} [isError] whether the execution resulted in an error
* @param {string} [errorType] type of error if isError is true
*/
export function recordToolExecution(
toolName,
durationMs,
isError = false,
errorType = "",
) {
recordHistogram("sf_tool_execution_duration_ms", durationMs);
if (isError) {
recordCounter(
"sf_tool_errors_total",
{ tool_name: toolName, error_type: errorType || "unknown" },
1,
);
}
}
/**
* Record model request performance.
*
* @param {string} modelId model identifier
* @param {number} durationMs request duration in milliseconds
* @param {boolean} [isError] whether the request resulted in an error
* @param {string} [errorType] type of error if isError is true
*/
export function recordModelRequest(
modelId,
durationMs,
isError = false,
errorType = "",
) {
recordHistogram("sf_model_request_duration_ms", durationMs);
if (isError) {
recordCounter(
"sf_model_errors_total",
{ model_id: modelId, error_type: errorType || "unknown" },
1,
);
}
}
/**
* Record database operation performance.
*
* @param {string} operation database operation name
* @param {number} durationMs query duration in milliseconds
* @param {boolean} [isError] whether the operation resulted in an error
* @param {string} [errorType] type of error if isError is true
*/
export function recordDatabaseOperation(
operation,
durationMs,
isError = false,
errorType = "",
) {
recordHistogram("sf_database_query_duration_ms", durationMs);
if (isError) {
recordCounter(
"sf_database_errors_total",
{ operation, error_type: errorType || "unknown" },
1,
);
}
}
/**
* Record system warning.
*
* @param {string} component system component that issued the warning
* @param {string} warningType type of warning
*/
export function recordSystemWarning(component, warningType) {
recordCounter(
"sf_system_warnings_total",
{ component, warning_type: warningType },
1,
);
}
/**
* Update resource usage gauges.
*
* @param {object} resources resource usage data
* @param {number} [resources.activeSessions] number of active sessions
* @param {number} [resources.activeAgents] number of active agents
* @param {number} [resources.concurrentToolCalls] number of concurrent tool calls
*/
export function updateResourceGauges(resources = {}) {
if (resources.activeSessions !== undefined) {
recordGauge("sf_active_sessions_count", resources.activeSessions);
}
if (resources.activeAgents !== undefined) {
recordGauge("sf_active_agents_count", resources.activeAgents);
}
if (resources.concurrentToolCalls !== undefined) {
recordGauge("sf_concurrent_tool_calls", resources.concurrentToolCalls);
}
}
/**
* Get current metrics text in Prometheus format.
*/
@ -673,10 +1006,70 @@ const METRIC_META = {
labels: ["unit_id", "model_id"],
},
// Performance tracking
sf_session_start_duration_ms: {
help: "Session start duration in milliseconds",
buckets: [100, 250, 500, 1000, 2000, 5000],
},
sf_tool_execution_duration_ms: {
help: "Tool execution duration in milliseconds",
buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000],
},
sf_model_request_duration_ms: {
help: "Model request duration in milliseconds",
buckets: [100, 500, 1000, 2500, 5000, 10000, 30000, 60000],
},
sf_database_query_duration_ms: {
help: "Database query duration in milliseconds",
buckets: [1, 5, 10, 25, 50, 100, 250, 500],
},
// Resource usage
sf_active_sessions_count: {
help: "Number of active sessions",
},
sf_active_agents_count: {
help: "Number of active agents",
},
sf_concurrent_tool_calls: {
help: "Number of concurrent tool calls",
},
// Error tracking
sf_tool_errors_total: {
help: "Total tool execution errors",
labels: ["tool_name", "error_type"],
},
sf_model_errors_total: {
help: "Total model request errors",
labels: ["model_id", "error_type"],
},
sf_database_errors_total: {
help: "Total database operation errors",
labels: ["operation", "error_type"],
},
sf_system_warnings_total: {
help: "Total system warnings",
labels: ["component", "warning_type"],
},
// Internal
sf_metrics_flush_failed_total: {
help: "Total metrics flush failures",
},
sf_metrics_flush_success_total: {
help: "Total successful metrics flushes",
},
sf_metrics_flush_duration_ms: {
help: "Duration of last metrics flush in milliseconds",
},
sf_metrics_system_uptime_seconds: {
help: "Metrics system uptime in seconds",
},
sf_metrics_database_status: {
help: "Database connection status (1=connected, 0=disconnected)",
labels: ["project_path"],
},
};
function getMetricMeta(name) {

View file

@ -0,0 +1,266 @@
// session-recorder.js — per-process session lifecycle management
//
// Maintains in-memory state for the active session (id, turn index, pending
// turn row id) and dispatches writes to sf-db.js on hook events. Keeping
// this state here — not in register-hooks.js — lets it be reset cleanly
// across session_switch events and tested in isolation.
//
// Purpose: bridge the Pi/Copilot hook lifecycle into the sf.db session layer
// so every user↔assistant exchange is persisted as a searchable turns row,
// promoted into memory_sources for future LLM extraction + vector embedding,
// and cross-session learning has the structural inputs it needs.
//
// Consumer: register-hooks.js session_start, before_agent_start, agent_end,
// tool_call, session_shutdown hooks.
import { createMemorySource } from "./memory-source-store.js";
import {
insertSessionTurn,
patchTurnResponse,
recordSessionFileTouch,
recordSessionRef,
upsertSession,
} from "./sf-db.js";
// ── Write-class tools whose input.path counts as a file touch ──────────────
const WRITE_TOOL_NAMES = new Set([
"edit_file",
"create_file",
"write_file",
"str_replace_editor",
"str_replace_based_edit_tool",
"rewrite_file",
"insert_content",
"delete_file",
]);
// ── Ref patterns extracted from turn text ──────────────────────────────────
const REF_PATTERNS = [
{ type: "pr", re: /\bPR\s*#(\d+)\b|\bpull.request[/ #]+(\d+)\b/gi },
{ type: "issue", re: /\bissue\s*#(\d+)\b|\bGH-(\d+)\b/gi },
{ type: "commit", re: /\b([0-9a-f]{7,40})\b/g },
{ type: "branch", re: /\borigin\/([^\s"'`]+)\b|\bbranch[: ]+([^\s"'`]+)/gi },
];
// ── Module-level active session state ─────────────────────────────────────
let _sessionId = null;
let _turnIndex = -1;
let _pendingTurnId = null;
/** User message text captured in recordTurnStart — used in promoteTurnToMemorySource. */
let _pendingUserMessage = null;
/**
* Reset all session state. Called on session_start and session_switch to
* ensure each host session gets a clean slate.
*
* Purpose: prevent stale state from a previous session bleeding into the next.
* Consumer: initSessionRecorder, session_switch handler.
*/
export function resetSessionRecorder() {
_sessionId = null;
_turnIndex = -1;
_pendingTurnId = null;
_pendingUserMessage = null;
}
/**
* Initialize the recorder for a new session. Upserts the sessions row and
* resets per-turn counters. Safe to call multiple times upsertSession is
* idempotent.
*
* Purpose: establish the session row that all subsequent turns hang off.
* Consumer: register-hooks.js session_start.
*/
export function initSessionRecorder(
sessionId,
{ mode, cwd, repo, branch } = {},
) {
resetSessionRecorder();
if (!sessionId) return;
_sessionId = sessionId;
_turnIndex = 0;
upsertSession({
sessionId,
mode: mode ?? "interactive",
cwd: cwd ?? process.cwd(),
repo,
branch,
});
}
/**
* Record the start of a new turn (user message). Inserts the turns row with
* the user_message immediately so the record exists even if agent_end never
* fires (e.g. crash or interrupt). Also scans for ref mentions.
*
* Returns the DB row id so tool-call handlers can link touches to this turn.
*
* Purpose: store the user's intent before the agent processes it.
* Consumer: register-hooks.js before_agent_start.
*/
export function recordTurnStart(userMessage) {
if (!_sessionId) return null;
const ts = new Date().toISOString();
const id = insertSessionTurn({
sessionId: _sessionId,
turnIndex: _turnIndex,
userMessage: userMessage ?? null,
ts,
});
_pendingTurnId = id;
_pendingUserMessage = userMessage ?? null;
if (userMessage) extractAndRecordRefs(userMessage, id);
return id;
}
/**
* Patch the assistant_response on the pending turn row. Called from agent_end.
* Also promotes the completed turn into memory_sources so the content is
* available for `/memory rebuild` (LLM extraction memories rows) and
* eventual vector embedding via runEmbeddingBackfill. Advances the turn
* index so the next turn gets a fresh slot.
*
* Purpose: complete the turn record so both halves are searchable as a unit,
* and make the raw turn text a first-class memory source for retrieval.
* Consumer: register-hooks.js agent_end.
*/
export function recordTurnEnd(assistantResponse) {
if (!_sessionId) return;
if (assistantResponse) {
patchTurnResponse(_sessionId, _turnIndex, assistantResponse);
extractAndRecordRefs(assistantResponse, _pendingTurnId);
}
// Promote the completed turn into memory_sources for future LLM extraction
// + vector embedding. Both halves are available here — user message was
// captured in recordTurnStart and assistant text arrives now.
promoteTurnToMemorySource(_turnIndex, _pendingUserMessage, assistantResponse);
_pendingTurnId = null;
_pendingUserMessage = null;
_turnIndex += 1;
}
/**
* Record a file path as touched in the current session. No-op when no active
* session or when the tool is not a write-class tool.
*
* Purpose: build the session_file_touches index without requiring a full
* audit-event scan.
*
* Consumer: register-hooks.js tool_call.
*/
export function recordFileTouch(toolName, filePath) {
if (!_sessionId || !filePath) return;
if (!WRITE_TOOL_NAMES.has(toolName)) return;
recordSessionFileTouch({
sessionId: _sessionId,
path: filePath,
toolName,
turnId: _pendingTurnId,
firstSeenAt: new Date().toISOString(),
});
}
/**
* Update the session summary (e.g. from a compaction summary). The summary
* is the most recent compaction digest useful as a semantic handle for
* memory-pipeline promotion. Also persists the summary as a memory_source
* with kind="session" so `/memory rebuild` can extract durable knowledge
* from it without requiring an LLM call at compaction time.
*
* Purpose: give memory retrieval a compact description of each session's work.
* Consumer: register-hooks.js session_before_compact result.
*/
export function updateSessionSummary(summary) {
if (!_sessionId || !summary) return;
upsertSession({ sessionId: _sessionId, summary });
// Promote into memory_sources for future LLM extraction + embedding.
// The [session:<id>] prefix enables idempotent detection on re-compact.
try {
const content = `[session:${_sessionId}] ${summary.slice(0, 1000)}`;
createMemorySource({
kind: "session",
uri: null,
title: `Session ${_sessionId.slice(0, 8)} summary`,
content,
scope: "project",
tags: ["session", "summary"],
});
} catch {
/* non-fatal: memory source creation must never block compaction */
}
}
/**
* Patch the session's repo + branch when git context becomes available after
* initial startup (common when cwd is set before the DB is open).
*
* Purpose: back-fill attribution data so sessionrepo queries work.
* Consumer: register-hooks.js after ensureDbOpen resolves.
*/
export function patchSessionGitContext(repo, branch) {
if (!_sessionId) return;
upsertSession({
sessionId: _sessionId,
repo: repo ?? null,
branch: branch ?? null,
});
}
// ── Helpers ────────────────────────────────────────────────────────────────
/**
* Synthesize a memory_source entry from a completed turn so the conversation
* text feeds the memory pipeline. The source can later be processed by
* `/memory rebuild` (LLM extraction) or embedded via runEmbeddingBackfill.
*
* Content format: `[turn:<session_id>:<turn_index>] Q: <user_msg> A: <resp>`
* The structured prefix enables idempotent detection across rebuilds.
*
* Silently no-ops when either half of the turn is absent, the session is not
* initialised, or createMemorySource fails turn recording must never block.
*/
function promoteTurnToMemorySource(turnIndex, userMessage, assistantResponse) {
if (!_sessionId) return;
// Only promote turns that have at least one non-trivial text half.
const user = (userMessage ?? "").trim();
const asst = (assistantResponse ?? "").trim();
if (!user && !asst) return;
try {
const prefix = `[turn:${_sessionId}:${turnIndex}]`;
const qPart = user ? `Q: ${user.slice(0, 400)}` : "";
const aPart = asst ? `A: ${asst.slice(0, 400)}` : "";
const content = [prefix, qPart, aPart].filter(Boolean).join(" ");
createMemorySource({
kind: "turn",
uri: null,
title: `Turn ${_sessionId.slice(0, 8)}:${turnIndex}`,
content,
scope: "project",
tags: ["turn", "session"],
});
} catch {
/* non-fatal */
}
}
function extractAndRecordRefs(text, turnId) {
if (!_sessionId || !text) return;
const now = new Date().toISOString();
for (const { type, re } of REF_PATTERNS) {
re.lastIndex = 0;
let m;
while ((m = re.exec(text)) !== null) {
const value = (m[1] ?? m[2] ?? "").trim();
// Skip short hex strings that are likely not real commit SHAs
if (type === "commit" && value.length < 7) continue;
if (!value) continue;
recordSessionRef({
sessionId: _sessionId,
refType: type,
refValue: value,
turnId: turnId ?? null,
createdAt: now,
});
}
}
}

View file

@ -18,8 +18,18 @@
// The separate `.sf/unit-claims.db` managed by `unit-ownership.ts` is an
// intentionally independent store for cross-worktree claim races and is
// excluded from this invariant.
import { copyFileSync, existsSync, mkdirSync, realpathSync } from "node:fs";
import { dirname } from "node:path";
import {
copyFileSync,
existsSync,
mkdirSync,
readdirSync,
readFileSync,
realpathSync,
statSync,
unlinkSync,
writeFileSync,
} from "node:fs";
import { dirname, join } from "node:path";
import { DatabaseSync } from "node:sqlite";
import { SF_STALE_STATE, SFError } from "./errors.js";
import { getGateIdsForTurn } from "./gate-registry.js";
@ -48,6 +58,9 @@ function normalizeRows(rows) {
return rows.map((r) => normalizeRow(r));
}
const DB_QUERY_TIMEOUT_MS = 30_000;
const DB_BACKUP_MIN_INTERVAL_MS = 15 * 60 * 1000;
const DB_BACKUP_RETENTION = 24;
const DB_FULL_VACUUM_MIN_INTERVAL_MS = 6 * 60 * 60 * 1000;
function createAdapter(rawDb) {
const db = rawDb;
@ -114,7 +127,124 @@ function openRawDb(path) {
loadProvider();
return new DatabaseSync(path);
}
const SCHEMA_VERSION = 47;
function sqliteStringLiteral(value) {
return `'${String(value).replaceAll("'", "''")}'`;
}
function databaseBackupDir(path) {
return join(dirname(path), "backups", "db");
}
function latestDatabaseBackupMtime(dir) {
if (!existsSync(dir)) return 0;
let latest = 0;
for (const entry of readdirSync(dir)) {
if (!entry.startsWith("sf.db.")) continue;
const file = join(dir, entry);
try {
const stat = statSync(file);
if (stat.isFile() && stat.mtimeMs > latest) latest = stat.mtimeMs;
} catch {
// Ignore files that disappear during pruning.
}
}
return latest;
}
function pruneDatabaseBackups(dir) {
if (!existsSync(dir)) return;
const backups = [];
for (const entry of readdirSync(dir)) {
if (!entry.startsWith("sf.db.")) continue;
const file = join(dir, entry);
try {
const stat = statSync(file);
if (stat.isFile()) backups.push({ file, mtimeMs: stat.mtimeMs });
} catch {
// Ignore files that disappear during pruning.
}
}
backups.sort((a, b) => b.mtimeMs - a.mtimeMs);
for (const backup of backups.slice(DB_BACKUP_RETENTION)) {
try {
unlinkSync(backup.file);
} catch {
// Best-effort retention; never block DB open on pruning.
}
}
}
function databaseMaintenancePath(path) {
return join(databaseBackupDir(path), "maintenance.json");
}
function readDatabaseMaintenanceState(path) {
try {
return JSON.parse(readFileSync(databaseMaintenancePath(path), "utf-8"));
} catch {
return {};
}
}
function writeDatabaseMaintenanceState(path, state) {
try {
writeFileSync(
databaseMaintenancePath(path),
JSON.stringify(state, null, 2) + "\n",
"utf-8",
);
} catch {
// Best-effort maintenance metadata.
}
}
function createDatabaseSnapshot(rawDb, path) {
if (path === ":memory:" || process.env.SF_DB_BACKUP_DISABLE === "1") return;
const dir = databaseBackupDir(path);
try {
mkdirSync(dir, { recursive: true });
const latest = latestDatabaseBackupMtime(dir);
if (latest > 0 && Date.now() - latest < DB_BACKUP_MIN_INTERVAL_MS) return;
const stamp = new Date().toISOString().replace(/[:.]/g, "-");
const backupPath = join(dir, `sf.db.${stamp}`);
rawDb.exec(`VACUUM INTO ${sqliteStringLiteral(backupPath)}`);
pruneDatabaseBackups(dir);
} catch (err) {
logWarning(
"sf-db",
`database snapshot failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
function performDatabaseMaintenance(rawDb, path) {
if (path === ":memory:" || process.env.SF_DB_MAINTENANCE_DISABLE === "1")
return;
try {
const quickCheck = rawDb.prepare("PRAGMA quick_check").get();
if (quickCheck?.quick_check !== "ok") {
logWarning("sf-db", "database quick_check failed; skipping maintenance");
return;
}
rawDb.exec("PRAGMA wal_checkpoint(PASSIVE)");
rawDb.exec("PRAGMA optimize");
rawDb.exec("PRAGMA incremental_vacuum(128)");
const state = readDatabaseMaintenanceState(path);
const lastFullVacuumAt =
typeof state.lastFullVacuumAt === "string"
? Date.parse(state.lastFullVacuumAt)
: 0;
if (
!Number.isFinite(lastFullVacuumAt) ||
Date.now() - lastFullVacuumAt >= DB_FULL_VACUUM_MIN_INTERVAL_MS
) {
rawDb.exec("VACUUM");
writeDatabaseMaintenanceState(path, {
...state,
lastFullVacuumAt: new Date().toISOString(),
});
}
} catch (err) {
logWarning(
"sf-db",
`database maintenance failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
const SCHEMA_VERSION = 49;
function indexExists(db, name) {
return !!db
.prepare(
@ -269,6 +399,125 @@ function ensureSolverEvalTables(db) {
"CREATE INDEX IF NOT EXISTS idx_solver_eval_case_false_complete ON solver_eval_case_results(false_complete, mode)",
);
}
function ensureSessionTables(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS sessions (
session_id TEXT PRIMARY KEY,
trace_id TEXT DEFAULT NULL,
mode TEXT NOT NULL DEFAULT 'interactive',
cwd TEXT NOT NULL DEFAULT '',
repo TEXT DEFAULT NULL,
branch TEXT DEFAULT NULL,
summary TEXT DEFAULT NULL,
summary_count INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
)
`);
db.exec(`
CREATE TABLE IF NOT EXISTS turns (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL REFERENCES sessions(session_id) ON DELETE CASCADE,
turn_index INTEGER NOT NULL,
user_message TEXT,
assistant_response TEXT,
ts TEXT NOT NULL,
UNIQUE(session_id, turn_index)
)
`);
db.exec(`
CREATE TABLE IF NOT EXISTS session_file_touches (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL REFERENCES sessions(session_id) ON DELETE CASCADE,
path TEXT NOT NULL,
tool_name TEXT DEFAULT NULL,
turn_id INTEGER DEFAULT NULL REFERENCES turns(id),
first_seen_at TEXT NOT NULL,
UNIQUE(session_id, path)
)
`);
db.exec(`
CREATE TABLE IF NOT EXISTS session_refs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL REFERENCES sessions(session_id) ON DELETE CASCADE,
ref_type TEXT NOT NULL,
ref_value TEXT NOT NULL,
turn_id INTEGER DEFAULT NULL REFERENCES turns(id),
created_at TEXT NOT NULL,
UNIQUE(session_id, ref_type, ref_value)
)
`);
// FTS5 external-content table over turns for keyword recall.
// content_rowid links to turns.id; triggers below keep it in sync.
db.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS turns_fts USING fts5(
user_message,
assistant_response,
content='turns',
content_rowid='id'
)
`);
db.exec(`
CREATE TRIGGER IF NOT EXISTS turns_fts_insert AFTER INSERT ON turns BEGIN
INSERT INTO turns_fts(rowid, user_message, assistant_response)
VALUES (new.id, new.user_message, new.assistant_response);
END
`);
db.exec(`
CREATE TRIGGER IF NOT EXISTS turns_fts_update AFTER UPDATE ON turns BEGIN
INSERT INTO turns_fts(turns_fts, rowid, user_message, assistant_response)
VALUES ('delete', old.id, old.user_message, old.assistant_response);
INSERT INTO turns_fts(rowid, user_message, assistant_response)
VALUES (new.id, new.user_message, new.assistant_response);
END
`);
db.exec(`
CREATE TRIGGER IF NOT EXISTS turns_fts_delete AFTER DELETE ON turns BEGIN
INSERT INTO turns_fts(turns_fts, rowid, user_message, assistant_response)
VALUES ('delete', old.id, old.user_message, old.assistant_response);
END
`);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_sessions_created ON sessions(created_at DESC)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_sessions_repo ON sessions(repo, created_at DESC)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_turns_session ON turns(session_id, turn_index)",
);
db.exec("CREATE INDEX IF NOT EXISTS idx_turns_ts ON turns(ts DESC)");
db.exec(
"CREATE INDEX IF NOT EXISTS idx_session_file_touches_session ON session_file_touches(session_id, first_seen_at DESC)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_session_file_touches_path ON session_file_touches(path, session_id)",
);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_session_refs_session ON session_refs(session_id, created_at DESC)",
);
}
function ensureSessionSnapshotTable(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS session_snapshots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
-- Session that triggered this checkpoint. FK to sessions(session_id).
session_id TEXT NOT NULL,
-- Zero-based counter within the session (first snapshot = 0).
snapshot_index INTEGER NOT NULL DEFAULT 0,
-- Optional git stash ref so the snapshot can be restored exactly.
-- NULL when the working tree had no changes to stash.
git_stash_ref TEXT,
-- Free-text label for the snapshot (e.g. "before migration deploy").
label TEXT,
ts TEXT NOT NULL,
UNIQUE(session_id, snapshot_index)
)
`);
db.exec(
"CREATE INDEX IF NOT EXISTS idx_session_snapshots_session ON session_snapshots(session_id, snapshot_index)",
);
}
function ensureHeadlessRunTables(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS headless_runs (
@ -1038,6 +1287,8 @@ function initSchema(db, fileBacked) {
ensureScheduleTables(db);
ensureSolverEvalTables(db);
ensureHeadlessRunTables(db);
ensureSessionTables(db);
ensureSessionSnapshotTable(db);
ensureUokMessageTables(db);
ensureSpecSchemaTables(db);
ensureTaskFrontmatterColumns(db);
@ -2592,9 +2843,7 @@ function migrateSchema(db) {
.all()
.map((c) => c.name);
if (cols.includes("superseded_by")) {
db.exec(
"ALTER TABLE validation_runs DROP COLUMN superseded_by",
);
db.exec("ALTER TABLE validation_runs DROP COLUMN superseded_by");
}
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
@ -2603,6 +2852,58 @@ function migrateSchema(db) {
":applied_at": new Date().toISOString(),
});
}
if (currentVersion < 48) {
// Session layer: create tables, backfill from existing headless_runs and
// audit_turn_index so historical data is queryable from day one.
// Message text will be NULL for backfilled turns — it was never stored.
ensureSessionTables(db);
// Backfill: one session per headless run.
db.exec(`
INSERT OR IGNORE INTO sessions (session_id, trace_id, mode, cwd, created_at, updated_at)
SELECT run_id, NULL, 'headless', '', created_at, updated_at
FROM headless_runs
`);
// Backfill: one session per distinct trace_id in audit_turn_index.
// Reconstruct created_at/updated_at from the min/max timestamps.
db.exec(`
INSERT OR IGNORE INTO sessions (session_id, trace_id, mode, cwd, created_at, updated_at)
SELECT trace_id, trace_id, 'interactive',
'', MIN(first_ts), MAX(last_ts)
FROM audit_turn_index
GROUP BY trace_id
`);
// Backfill: one turn row per (trace_id, turn_id) in audit_turn_index.
// turn_index derived from row order within trace; message text is NULL.
db.exec(`
INSERT OR IGNORE INTO turns (session_id, turn_index, user_message, assistant_response, ts)
SELECT
trace_id,
ROW_NUMBER() OVER (PARTITION BY trace_id ORDER BY first_ts) - 1,
NULL, NULL,
first_ts
FROM audit_turn_index
`);
// Rebuild FTS index from any turns that have text.
// None from backfill yet, but required so the FTS table is consistent.
db.exec(`INSERT INTO turns_fts(turns_fts) VALUES ('rebuild')`);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 48,
":applied_at": new Date().toISOString(),
});
}
if (currentVersion < 49) {
// Add session_snapshots table — checkpoints before irreversible ops.
// Safe to call on fresh DBs too (CREATE TABLE IF NOT EXISTS).
ensureSessionSnapshotTable(db);
db.prepare(
"INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)",
).run({
":version": 49,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
@ -2655,6 +2956,8 @@ export function openDatabase(path) {
const fileBacked = path !== ":memory:";
try {
initSchema(adapter, fileBacked);
createDatabaseSnapshot(rawDb, path);
performDatabaseMaintenance(rawDb, path);
} catch (err) {
// Corrupt freelist: DDL fails with "malformed" but VACUUM can rebuild.
// Attempt VACUUM recovery before giving up (see #2519).
@ -6591,6 +6894,246 @@ export function listHeadlessRuns(limit = 20) {
.all({ ":limit": Math.max(1, Math.min(100, Number(limit) || 20)) })
.map(headlessRunFromRow);
}
/**
* Upsert a session row. Creates on first call; updates updated_at, branch,
* repo, and summary on subsequent calls. Safe to call on every session_start
* and again when context becomes available (e.g. after git detection).
*
* Purpose: establish the session entity that all turns, file-touches, and
* refs hang off the missing structural layer for cross-session learning.
*
* Consumer: session-recorder.js on session_start and session_shutdown hooks.
*/
export function upsertSession(entry) {
if (!currentDb) return;
const now = new Date().toISOString();
currentDb
.prepare(`INSERT INTO sessions
(session_id, trace_id, mode, cwd, repo, branch, summary, summary_count, created_at, updated_at)
VALUES (:session_id, :trace_id, :mode, :cwd, :repo, :branch, :summary, 0, :now, :now)
ON CONFLICT(session_id) DO UPDATE SET
trace_id = COALESCE(excluded.trace_id, sessions.trace_id),
repo = COALESCE(excluded.repo, sessions.repo),
branch = COALESCE(excluded.branch, sessions.branch),
summary = COALESCE(excluded.summary, sessions.summary),
summary_count = CASE WHEN excluded.summary IS NOT NULL
THEN sessions.summary_count + 1
ELSE sessions.summary_count END,
updated_at = excluded.updated_at`)
.run({
":session_id": entry.sessionId,
":trace_id": entry.traceId ?? null,
":mode": entry.mode ?? "interactive",
":cwd": entry.cwd ?? "",
":repo": entry.repo ?? null,
":branch": entry.branch ?? null,
":summary": entry.summary ?? null,
":now": now,
});
}
/**
* Insert a turn row for a session. Returns the new turn's integer id so the
* caller can link subsequent file-touches and refs to it.
*
* Purpose: record every userassistant exchange so turn text is searchable
* via turns_fts and promotable into the memory pipeline.
*
* Consumer: session-recorder.js on before_agent_start (user_message) and
* agent_end (assistant_response patch).
*/
export function insertSessionTurn(entry) {
if (!currentDb) return null;
const result = currentDb
.prepare(`INSERT INTO turns
(session_id, turn_index, user_message, assistant_response, ts)
VALUES (:session_id, :turn_index, :user_message, :assistant_response, :ts)
ON CONFLICT(session_id, turn_index) DO UPDATE SET
user_message = COALESCE(excluded.user_message, turns.user_message),
assistant_response = COALESCE(excluded.assistant_response, turns.assistant_response)`)
.run({
":session_id": entry.sessionId,
":turn_index": entry.turnIndex,
":user_message": entry.userMessage ?? null,
":assistant_response": entry.assistantResponse ?? null,
":ts": entry.ts ?? new Date().toISOString(),
});
return result.lastInsertRowid ?? null;
}
/**
* Patch the assistant_response on an existing turn row. Called from agent_end
* after the model finishes so the full response is stored alongside the prompt.
*
* Purpose: complete the turn record so both halves of the exchange are
* searchable and promotable as a unit.
*
* Consumer: session-recorder.js on agent_end.
*/
export function patchTurnResponse(sessionId, turnIndex, assistantResponse) {
if (!currentDb) return;
currentDb
.prepare(`UPDATE turns SET assistant_response = :resp
WHERE session_id = :sid AND turn_index = :idx AND assistant_response IS NULL`)
.run({
":resp": assistantResponse,
":sid": sessionId,
":idx": turnIndex,
});
}
/**
* Record that a file path was touched in a session. UNIQUE(session_id, path)
* means repeated touches in one session are collapsed to a single row
* only first_seen_at and tool_name (of the first touch) are retained.
*
* Purpose: enable "which files did I touch last session?" and cross-session
* file-history queries without storing a full audit log per touch.
*
* Consumer: session-recorder.js on tool_call for write-class tools.
*/
export function recordSessionFileTouch(entry) {
if (!currentDb) return;
currentDb
.prepare(`INSERT OR IGNORE INTO session_file_touches
(session_id, path, tool_name, turn_id, first_seen_at)
VALUES (:session_id, :path, :tool_name, :turn_id, :first_seen_at)`)
.run({
":session_id": entry.sessionId,
":path": entry.path,
":tool_name": entry.toolName ?? null,
":turn_id": entry.turnId ?? null,
":first_seen_at": entry.firstSeenAt ?? new Date().toISOString(),
});
}
/**
* Record a PR / issue / commit / branch ref mentioned in a session. Idempotent
* via UNIQUE(session_id, ref_type, ref_value).
*
* Purpose: make sessions queryable by the work items they touched so
* "what session created PR #42?" is a single indexed lookup.
*
* Consumer: session-recorder.js when refs are detected in turn text.
*/
export function recordSessionRef(entry) {
if (!currentDb) return;
currentDb
.prepare(`INSERT OR IGNORE INTO session_refs
(session_id, ref_type, ref_value, turn_id, created_at)
VALUES (:session_id, :ref_type, :ref_value, :turn_id, :created_at)`)
.run({
":session_id": entry.sessionId,
":ref_type": entry.refType,
":ref_value": entry.refValue,
":turn_id": entry.turnId ?? null,
":created_at": entry.createdAt ?? new Date().toISOString(),
});
}
/**
* Full-text search across turns via the FTS5 turns_fts virtual table.
* Returns matching turns with their session metadata ordered by relevance.
*
* Purpose: power cross-session keyword recall "what did I ask about auth?",
* "find sessions where I worked on retry handling".
*
* Consumer: sf memory search, context-injection, and /session search command.
*/
export function searchSessionTurns(query, limit = 20) {
if (!currentDb) return [];
return currentDb
.prepare(`SELECT t.id, t.session_id, t.turn_index, t.ts,
t.user_message, t.assistant_response,
s.mode, s.cwd, s.repo, s.branch
FROM turns_fts
JOIN turns t ON turns_fts.rowid = t.id
JOIN sessions s ON t.session_id = s.session_id
WHERE turns_fts MATCH :query
ORDER BY rank
LIMIT :limit`)
.all({ ":query": query, ":limit": Math.max(1, Math.min(100, limit)) });
}
/**
* List recent sessions with their turn count and last-touched file count.
* Useful for /session list and for memory-pipeline ingestion sweeps.
*
* Consumer: trajectory-command, memory-ingest, doctor checks.
*/
export function listRecentSessions(limit = 20) {
if (!currentDb) return [];
return currentDb
.prepare(`SELECT s.session_id, s.mode, s.cwd, s.repo, s.branch,
s.summary, s.created_at, s.updated_at,
COUNT(DISTINCT t.id) AS turn_count,
COUNT(DISTINCT f.id) AS file_count
FROM sessions s
LEFT JOIN turns t ON t.session_id = s.session_id
LEFT JOIN session_file_touches f ON f.session_id = s.session_id
GROUP BY s.session_id
ORDER BY s.updated_at DESC
LIMIT :limit`)
.all({ ":limit": Math.max(1, Math.min(100, limit)) });
}
/**
* Record a snapshot checkpoint before an irreversible operation. Idempotent
* within a session: the snapshot_index is auto-incremented from the current
* max so callers can create multiple checkpoints per session without
* coordination.
*
* Purpose: give session_snapshots a first-class row so recovery paths and
* irreversible-ops gates can reference the stash ref and label without
* parsing free-text.
*
* Consumer: irreversible-ops safety gate (session_before_compact, future
* verify steps that call git stash before destructive actions).
*
* @param {{ sessionId: string, gitStashRef?: string|null, label?: string|null, ts?: string }} args
* @returns {number} The row id of the inserted snapshot (or 0 on failure).
*/
export function insertSessionSnapshot(args) {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
const nextIndex = (() => {
const row = currentDb
.prepare(
"SELECT COALESCE(MAX(snapshot_index), -1) + 1 AS nxt FROM session_snapshots WHERE session_id = :sid",
)
.get({ ":sid": args.sessionId });
return row ? Number(row["nxt"]) : 0;
})();
currentDb
.prepare(`INSERT INTO session_snapshots
(session_id, snapshot_index, git_stash_ref, label, ts)
VALUES (:sid, :idx, :ref, :label, :ts)`)
.run({
":sid": args.sessionId,
":idx": nextIndex,
":ref": args.gitStashRef ?? null,
":label": args.label ?? null,
":ts": args.ts ?? new Date().toISOString(),
});
const row = currentDb
.prepare(
"SELECT id FROM session_snapshots WHERE session_id = :sid AND snapshot_index = :idx",
)
.get({ ":sid": args.sessionId, ":idx": nextIndex });
return row ? Number(row["id"]) : 0;
}
/**
* List all snapshots for a session, ordered by snapshot_index ascending.
*
* Purpose: let recovery tooling enumerate available restore points for a
* session and present them to the operator before a rollback.
*
* Consumer: future /session snapshots command and irreversible-ops skill.
*
* @param {string} sessionId
* @returns {Array<{id:number, session_id:string, snapshot_index:number, git_stash_ref:string|null, label:string|null, ts:string}>}
*/
export function listSessionSnapshots(sessionId) {
if (!currentDb) return [];
return currentDb
.prepare(
"SELECT * FROM session_snapshots WHERE session_id = :sid ORDER BY snapshot_index ASC",
)
.all({ ":sid": sessionId });
}
/**
* INSERT OR REPLACE a quality_gates row. Used by milestone-validation-gates.ts
* to persist milestone-level (MV*) gate outcomes after validate-milestone runs.

View file

@ -31,6 +31,9 @@ function normalize(name) {
const UNIT_TYPE_SKILL_MANIFEST = {
// Milestone-level planning / meta flows — predictable skill sets.
"research-milestone": [
"autoresearch",
"human-writing",
"sf-wiki",
"write-docs",
"write-milestone-brief",
"decompose-into-slices",
@ -40,6 +43,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"observability",
],
"plan-milestone": [
"human-writing",
"sf-wiki",
"write-milestone-brief",
"decompose-into-slices",
"design-an-interface",
@ -50,6 +55,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"verify-before-complete",
],
"roadmap-meeting": [
"human-writing",
"sf-wiki",
"write-milestone-brief",
"decompose-into-slices",
"design-an-interface",
@ -60,6 +67,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"verify-before-complete",
],
"complete-milestone": [
"human-writing",
"sf-wiki",
"verify-before-complete",
"write-docs",
"handoff",
@ -78,6 +87,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"observability",
],
"reassess-roadmap": [
"human-writing",
"sf-wiki",
"decompose-into-slices",
"grill-me",
"write-milestone-brief",
@ -86,6 +97,9 @@ const UNIT_TYPE_SKILL_MANIFEST = {
],
// Slice-level research / planning.
"research-slice": [
"autoresearch",
"human-writing",
"sf-wiki",
"write-docs",
"decompose-into-slices",
"design-an-interface",
@ -94,6 +108,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"observability",
],
"plan-slice": [
"human-writing",
"sf-wiki",
"decompose-into-slices",
"design-an-interface",
"grill-me",
@ -103,6 +119,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"verify-before-complete",
],
"refine-slice": [
"human-writing",
"sf-wiki",
"decompose-into-slices",
"design-an-interface",
"grill-me",
@ -112,6 +130,8 @@ const UNIT_TYPE_SKILL_MANIFEST = {
"verify-before-complete",
],
"replan-slice": [
"human-writing",
"sf-wiki",
"decompose-into-slices",
"grill-me",
"design-an-interface",

View file

@ -7,10 +7,18 @@
* Consumer: skill loader, auto-skill creation, and model context assembly.
*/
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
import { join } from "node:path";
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";
const SKILL_FILENAME = "SKILL.md";
const USER_SKILL_DIR = join(process.env.HOME ?? "", ".sf", "skills");
const BUNDLED_SKILL_DIR = join(
dirname(fileURLToPath(import.meta.url)),
"..",
"..",
"..",
"skills",
);
/**
* Find all skill directories under a base path.
@ -35,9 +43,17 @@ export function discoverSkillDirs(basePath) {
/**
* Discover skills from all sources: project, user, and built-in.
*/
export function discoverAllSkills(projectPath) {
export function discoverAllSkills(projectPath, options = {}) {
const sources = [];
// Bundled SF skills
if (options.includeBundled && existsSync(BUNDLED_SKILL_DIR)) {
const bundledSkills = discoverSkillDirsInRoot(BUNDLED_SKILL_DIR);
for (const s of bundledSkills) {
sources.push({ ...s, source: "bundled" });
}
}
// Project skills
if (projectPath) {
const projectSkills = discoverSkillDirs(projectPath);
@ -59,6 +75,22 @@ export function discoverAllSkills(projectPath) {
return sources;
}
function discoverSkillDirsInRoot(skillRoot) {
if (!existsSync(skillRoot)) return [];
const dirs = [];
for (const entry of readdirSync(skillRoot)) {
const full = join(skillRoot, entry);
if (statSync(full).isDirectory()) {
const skillFile = join(full, SKILL_FILENAME);
if (existsSync(skillFile)) {
dirs.push({ name: entry, path: full, skillFile });
}
}
}
return dirs;
}
/**
* Read the raw content of a skill file.
*/

View file

@ -18,8 +18,8 @@ import {
*
* Returns array of skill records with validation errors attached.
*/
export function loadSkills(projectPath) {
const discovered = discoverAllSkills(projectPath);
export function loadSkills(projectPath, options = {}) {
const discovered = discoverAllSkills(projectPath, options);
const skills = [];
for (const { name, path, source } of discovered) {
@ -47,7 +47,10 @@ export function loadSkills(projectPath) {
continue;
}
const validation = validateSkillFrontmatter(parsed.frontmatter);
const validation =
source === "bundled"
? validateBundledSkillFrontmatter(parsed.frontmatter)
: validateSkillFrontmatter(parsed.frontmatter);
if (!validation.valid) {
skills.push({
name,
@ -61,6 +64,15 @@ export function loadSkills(projectPath) {
}
const record = buildSkillRecord(path, parsed.frontmatter, parsed.body);
if (
source === "bundled" &&
parsed.frontmatter["user-invocable"] === undefined
) {
record.userInvocable = !isWorkflowOnlyBundledSkill(
parsed.frontmatter,
parsed.body,
);
}
skills.push({
...record,
source,
@ -72,6 +84,28 @@ export function loadSkills(projectPath) {
return skills;
}
function validateBundledSkillFrontmatter(frontmatter) {
const errors = [];
if (!frontmatter.name || typeof frontmatter.name !== "string") {
errors.push("Missing or invalid 'name' field");
}
if (!frontmatter.description || typeof frontmatter.description !== "string") {
errors.push("Missing or invalid 'description' field");
}
return {
valid: errors.length === 0,
errors,
};
}
function isWorkflowOnlyBundledSkill(frontmatter, body) {
const text = `${frontmatter.description ?? ""}\n${body ?? ""}`.toLowerCase();
return (
text.includes("use inside autonomous workflow") ||
text.includes("this is a workflow skill")
);
}
/**
* Get skills that are safe for the current permission profile.
*/
@ -88,6 +122,20 @@ export function getPermittedSkills(skills, activeProfile) {
});
}
/**
* Get SF runtime skills that should appear in the user-facing /skills catalog.
*
* Purpose: keep repo/operator and workflow-only skills available to routing
* without advertising them as SF runtime skills.
*
* Consumer: /skills list mode in the SF command surface.
*/
export function getUserInvocableSkills(skills) {
return skills.filter(
(s) => s.source === "bundled" && s.valid && s.userInvocable,
);
}
/**
* Get skills that can be invoked by the model for a given work mode.
*/

View file

@ -1,5 +1,7 @@
// SF Extension — State Derivation
// DB-primary state derivation with filesystem fallback for unmigrated projects.
// DB-primary state derivation with explicit recovery guidance when DB-backed
// projects cannot be opened. Legacy filesystem parsing remains available only
// for projects that have not yet attempted DB bootstrap in this process.
// Pure TypeScript, zero Pi dependencies.
import { existsSync, readdirSync, readFileSync } from "node:fs";
import { join, resolve } from "node:path";
@ -142,6 +144,44 @@ const CACHE_TTL_MS = 5000;
let _stateCache = null;
// ── Telemetry counters for derive-path observability ────────────────────────
const _telemetry = { dbDeriveCount: 0, markdownDeriveCount: 0 };
const EMPTY_REQUIREMENT_COUNTS = Object.freeze({
active: 0,
validated: 0,
deferred: 0,
outOfScope: 0,
blocked: 0,
total: 0,
});
function hasLegacyRuntimeArtifacts(basePath) {
if (findMilestoneIds(basePath).length > 0) return true;
return (
resolveSfRootFile(basePath, "PROJECT") !== null ||
resolveSfRootFile(basePath, "REQUIREMENTS") !== null ||
resolveSfRootFile(basePath, "DECISIONS") !== null ||
resolveSfRootFile(basePath, "KNOWLEDGE") !== null ||
resolveSfRootFile(basePath, "RUNTIME") !== null ||
resolveSfRootFile(basePath, "STATE") !== null
);
}
function buildDbRecoveryRequiredState() {
return {
activeMilestone: null,
activeSlice: null,
activeTask: null,
phase: "blocked",
recentDecisions: [],
blockers: [
"DB-backed SF state is unavailable. Runtime does not fall back to markdown authority after DB bootstrap fails.",
],
nextAction:
"Run `sf recover` to rebuild DB state from disk, or `sf migrate` for a legacy markdown-only project.",
registry: [],
requirements: EMPTY_REQUIREMENT_COUNTS,
progress: { milestones: { done: 0, total: 0 } },
};
}
/**
* Invalidate the deriveState() cache. Call this whenever planning files on disk
* may have changed (unit completion, merges, file writes).
@ -241,10 +281,18 @@ export async function deriveState(basePath) {
// the DB simply hasn't been opened yet (e.g. during before_agent_start
// context injection which runs before any tool invocation opens the DB).
if (wasDbOpenAttempted()) {
logWarning(
"state",
"DB unavailable — using filesystem state derivation (degraded mode)",
);
if (hasLegacyRuntimeArtifacts(basePath)) {
logWarning(
"state",
"DB unavailable for a project with legacy SF artifacts — refusing runtime markdown fallback; run sf recover or sf migrate",
);
result = buildDbRecoveryRequiredState();
stopTimer({ phase: result.phase, milestone: result.activeMilestone?.id });
debugCount("deriveStateCalls");
_stateCache = { basePath, result, timestamp: Date.now() };
return result;
}
logWarning("state", "DB unavailable — using filesystem state derivation");
}
result = await _deriveStateImpl(basePath);
_telemetry.markdownDeriveCount++;

View file

@ -0,0 +1,137 @@
/**
* Steerable Autonomous Extension - Copilot Auto-style controls
*
* Provides Shift+Tab interface for steering and asking questions
* during autonomous execution, similar to Copilot Auto.
* Also integrates Ctrl+Y for YOLO mode (bypass git prompts).
*/
import {
handleSteerableModeKey,
SteerableAutonomousPanel,
} from "./steerable-autonomous-panel.js";
export default function steerableAutonomousExtension(api) {
let panel = null;
let isAutonomousActive = false;
// Track autonomous mode state
api.on("session_start", async (_, ctx) => {
isAutonomousActive = false;
if (panel) {
panel.hide();
panel = null;
}
});
// Handle key events - Shift+Tab and Ctrl+Y
api.registerShortcut("shift+tab", {
description: "Open/close steerable autonomous panel",
handler: async (event, ctx) => {
if (!isAutonomousActive) {
ctx.ui.notify(
"Autonomous mode not active - use /autonomous to start",
"info",
);
return;
}
if (panel) {
panel.hide();
panel = null;
} else {
panel = new SteerableAutonomousPanel(ctx);
await panel.show();
}
},
});
api.registerShortcut("ctrl+y", {
description: "Toggle YOLO mode (bypass git prompts)",
handler: async (event, ctx) => {
if (ctx.settingsManager && ctx.settingsManager.toggleYOLO) {
const enabled = ctx.settingsManager.toggleYOLO();
ctx.ui.notify(
`🚀 YOLO mode ${enabled ? "ON" : "OFF"} - safe-git prompts ${enabled ? "disabled" : "enabled"}`,
enabled ? "success" : "info",
);
} else {
ctx.ui.notify(
"🚀 YOLO mode - safe-git prompts disabled for this session",
"success",
);
}
},
});
// Handle slash command for panel
api.registerCommand("steer", {
description: "Open steerable autonomous panel (Shift+Tab)",
handler: async (_, ctx) => {
if (!isAutonomousActive) {
ctx.ui.notify(
"Autonomous mode not active - use /autonomous to start",
"info",
);
return;
}
if (panel) {
panel.hide();
panel = null;
} else {
panel = new SteerableAutonomousPanel(ctx);
await panel.show();
}
},
});
// Listen for autonomous mode changes
api.on("autonomous_start", async (_, ctx) => {
isAutonomousActive = true;
ctx.ui.notify("🤖 Autonomous mode active - Shift+Tab to steer", "info");
});
api.on("autonomous_stop", async (_, ctx) => {
isAutonomousActive = false;
if (panel) {
panel.hide();
panel = null;
}
ctx.ui.notify("⏹️ Autonomous mode stopped", "info");
});
// Show current autonomous status
api.registerCommand("autonomous-status", {
description: "Show current autonomous mode status and retry attempts",
handler: async (_, ctx) => {
if (!isAutonomousActive) {
ctx.ui.notify("Autonomous mode not active", "info");
return;
}
// Would get current status from autonomous system
const status = {
currentPhase: "research",
attempts: 3,
status: "working on current task",
retryHistory: ["approach A", "approach B", "approach C"],
};
const lines = [
"🤖 Autonomous Mode Status",
"",
`Current Phase: ${status.currentPhase}`,
`Status: ${status.status}`,
`Attempts: ${status.attempts}`,
"",
"Recent Attempts:",
...status.retryHistory.map((attempt, i) => ` ${i + 1}. ${attempt}`),
"",
"Use Shift+Tab or /steer to control",
];
ctx.ui.notify(lines.join("\n"), "info");
},
});
}

View file

@ -0,0 +1,360 @@
/**
* Steerable Autonomous Mode - Interactive Control Panel
*
* Provides Shift+Tab interface for steering and asking questions
* during autonomous execution, similar to Copilot Auto.
* Also integrates Ctrl+Y for YOLO mode (bypass git prompts).
*/
import { createInterface } from "node:readline";
import { getEditorKeybindings } from "@singularity-forge/pi-tui";
// ─── Constants ──────────────────────────────────────────────────────────────
const PANEL_WIDTH = 60;
const PANEL_HEIGHT = 12;
const CONTROL_CATEGORIES = [
{
name: "🎯 Steering",
items: [
{ key: "1", label: "Focus on research", action: "focus_research" },
{ key: "2", label: "Focus on planning", action: "focus_plan" },
{ key: "3", label: "Focus on implementation", action: "focus_build" },
{ key: "4", label: "Speed up execution", action: "speed_up" },
{ key: "5", label: "Slow down execution", action: "slow_down" },
],
},
{
name: "❓ Ask Questions",
items: [
{ key: "q", label: "What are you working on?", action: "ask_status" },
{ key: "w", label: "Why this approach?", action: "ask_reasoning" },
{ key: "e", label: "What's next?", action: "ask_next" },
{ key: "r", label: "Are you stuck?", action: "ask_stuck" },
{ key: "t", label: "Explain your plan", action: "ask_plan" },
],
},
{
name: "🔄 Retry Status",
items: [
{
key: "a",
label: "What attempts have been tried?",
action: "ask_attempts",
},
{
key: "z",
label: "Why give up? What blockers?",
action: "ask_blockers",
},
{ key: "r", label: "Reassess and try new approach", action: "reassess" },
],
},
{
name: "⚡ Quick Controls",
items: [
{ key: "p", label: "Pause autonomous", action: "pause" },
{ key: "s", label: "Stop execution", action: "stop" },
{ key: "y", label: "YOLO mode (Ctrl+Y)", action: "yolo" },
{ key: "h", label: "Help/commands", action: "help" },
{ key: "esc", label: "Close panel", action: "close" },
],
},
];
// ─── UI Rendering ─────────────────────────────────────────────────────────────
function renderBox(lines, title = "") {
const width = PANEL_WIDTH;
const horizontalBorder = "─".repeat(width - 2);
let result = `┌─${title} ${horizontalBorder.slice(title.length + 1)}─┐\n`;
for (const line of lines) {
const padded = line.padEnd(width - 2, " ");
result += `${padded}\n`;
}
result += `└─${horizontalBorder}─┘\n`;
return result;
}
function renderCategory(category) {
const lines = [`\x1b[1m${category.name}\x1b[0m`];
for (const item of category.items) {
const keyDisplay = item.key === "esc" ? "Esc" : item.key.toUpperCase();
lines.push(` ${keyDisplay}. ${item.label}`);
}
return lines;
}
function renderPanel(currentStatus = "") {
const categories = CONTROL_CATEGORIES;
const panelContent = [];
// Add status line if provided
if (currentStatus) {
panelContent.push(`🤖 ${currentStatus}`);
panelContent.push(""); // empty line
}
// Render all categories
for (const category of categories) {
panelContent.push(...renderCategory(category));
if (category !== categories[categories.length - 1]) {
panelContent.push(""); // spacing between categories
}
}
// Add footer
panelContent.push("");
panelContent.push(
"\x1b[90mShift+Tab or / to open/close • Ctrl+Y for YOLO\x1b[0m",
);
return renderBox(panelContent, "🎛️ Steerable Autonomous Mode");
}
// ─── Action Handlers ────────────────────────────────────────────────────────────
const ACTION_HANDLERS = {
focus_research: async (ctx) => {
ctx.ui.notify("🎯 Focusing on research phase", "info");
// Would set autonomous mode to prioritize research
},
focus_plan: async (ctx) => {
ctx.ui.notify("🎯 Focusing on planning phase", "info");
// Would set autonomous mode to prioritize planning
},
focus_build: async (ctx) => {
ctx.ui.notify("🎯 Focusing on implementation phase", "info");
// Would set autonomous mode to prioritize building
},
speed_up: async (ctx) => {
ctx.ui.notify("⚡ Execution speed increased", "info");
// Would adjust autonomous execution speed
},
slow_down: async (ctx) => {
ctx.ui.notify("🐌 Execution speed decreased", "info");
// Would adjust autonomous execution speed
},
ask_status: async (ctx) => {
ctx.ui.notify("🤖 I'm currently working on [current task]", "info");
// Would provide current status via AI response
},
ask_reasoning: async (ctx) => {
ctx.ui.notify("🤖 I chose this approach because...", "info");
// Would provide reasoning via AI response
},
ask_next: async (ctx) => {
ctx.ui.notify("🤖 Next I'll [next step]", "info");
// Would provide next steps via AI response
},
ask_stuck: async (ctx) => {
ctx.ui.notify("🤖 I'm not stuck, but here's my status...", "info");
// Would provide stuck status via AI response
},
ask_plan: async (ctx) => {
ctx.ui.notify("🤖 My plan is: [detailed plan]", "info");
// Would provide plan explanation via AI response
},
pause: async (ctx) => {
ctx.ui.notify("⏸️ Autonomous mode paused", "info");
// Would pause autonomous execution
},
yolo: async (ctx) => {
// Toggle YOLO mode - integrate with existing SafeGit system
if (ctx.settingsManager && ctx.settingsManager.toggleYOLO) {
const enabled = ctx.settingsManager.toggleYOLO();
ctx.ui.notify(
`🚀 YOLO mode ${enabled ? "ON" : "OFF"} - safe-git prompts ${enabled ? "disabled" : "enabled"}`,
enabled ? "success" : "info",
);
} else {
ctx.ui.notify(
"🚀 YOLO mode - safe-git prompts disabled for this session",
"success",
);
}
},
help: async (ctx) => {
// Show help about the steerable mode
const helpText = renderPanel("Available controls shown above");
ctx.ui.notify("Steerable Autonomous Mode Help\n\n" + helpText, "info");
},
ask_attempts: async (ctx) => {
ctx.ui.notify(
"🤖 I've tried multiple approaches: [list of attempts]",
"info",
);
// Would provide list of attempted approaches
},
ask_blockers: async (ctx) => {
ctx.ui.notify("🤖 Main blockers: [list of current blockers]", "info");
// Would explain why it's giving up
},
reassess: async (ctx) => {
ctx.ui.notify("🔄 Reassessing - trying new approaches", "info");
// Would trigger immediate reassessment
},
close: async (ctx) => {
// Just hide the panel
},
};
// ─── Panel Controller ──────────────────────────────────────────────────────────
export class SteerableAutonomousPanel {
constructor(ctx) {
this.ctx = ctx;
this.isVisible = false;
this.rl = null;
}
async show() {
if (this.isVisible) return;
this.isVisible = true;
this.rl = createInterface({
input: process.stdin,
output: process.stdout,
terminal: true,
});
// Hide cursor while panel is open
process.stdout.write("\x1b[?25l");
// Render panel
this.render();
// Set up key listener
this.rl.input.on("keypress", (str, key) => {
this.handleKeyPress(key);
});
}
hide() {
if (!this.isVisible) return;
this.isVisible = false;
// Restore cursor
process.stdout.write("\x1b[?25h");
// Clear the panel area
process.stdout.write("\x1b[" + PANEL_HEIGHT + "F"); // Move cursor up
process.stdout.write("\x1b[0J"); // Clear from cursor down
if (this.rl) {
this.rl.close();
this.rl = null;
}
}
async render() {
if (!this.isVisible) return;
// Get current autonomous status (would come from actual system)
const currentStatus = "Working on current milestone...";
const panel = renderPanel(currentStatus);
// Move cursor to panel area
process.stdout.write("\x1b[s"); // Save current position
process.stdout.write("\x1b[H"); // Move to top-left
process.stdout.write(panel);
process.stdout.write("\x1b[u"); // Restore saved position
}
async handleKeyPress(key) {
if (!this.isVisible) return;
// Handle escape sequences
if (key.name === "escape") {
this.hide();
return;
}
// Find matching action
let actionKey = key.name || key.sequence?.toLowerCase() || "";
// Handle single character keys
if (actionKey.length === 1) {
actionKey = actionKey.toLowerCase();
}
// Find action
let action = null;
for (const category of CONTROL_CATEGORIES) {
const item = category.items.find((item) => item.key === actionKey);
if (item) {
action = item;
break;
}
}
if (action) {
await ACTION_HANDLERS[action.action](this.ctx);
// If it's not a close action, re-render panel
if (action.action !== "close") {
this.render();
}
}
}
}
// ─── Integration Hook ──────────────────────────────────────────────────────────
let activePanel = null;
export async function showSteerablePanel(ctx) {
if (activePanel) {
activePanel.hide();
}
activePanel = new SteerableAutonomousPanel(ctx);
await activePanel.show();
}
export async function hideSteerablePanel() {
if (activePanel) {
activePanel.hide();
activePanel = null;
}
}
// ─── Keyboard Integration (would integrate with TUI's key handler) ──────────
export function handleSteerableModeKey(key) {
// Shift+Tab opens/closes the panel
if (key.shift && key.name === "tab") {
return true; // Signal that we handled this key
}
return false;
}
export default {
show: showSteerablePanel,
hide: hideSteerablePanel,
handleKey: handleSteerableModeKey,
};

View file

@ -4,7 +4,16 @@ mode:
always_use_skills: []
prefer_skills: []
avoid_skills: []
skill_rules: []
skill_rules:
- when: writing or editing docs, plans, records, handoffs, PR text, or other human-readable prose
use:
- human-writing
- when: building repo orientation, architecture maps, generated wiki, subsystem inventory, or durable codebase context
use:
- sf-wiki
- when: optimizing a measurable metric through experiments, benchmarks, performance work, bundle size, test speed, or model quality
use:
- autoresearch
custom_instructions: []
models: {}
skill_discovery:

View file

@ -13,6 +13,7 @@ import {
closeDatabase,
getAllMilestones,
getSliceTasks,
isDbAvailable,
insertMilestone,
insertSlice,
insertTask,
@ -24,6 +25,10 @@ const tmpDirs = [];
afterEach(() => {
closeDatabase();
if (!isDbAvailable()) {
openDatabase(":memory:");
closeDatabase();
}
invalidateStateCache();
while (tmpDirs.length > 0) {
const dir = tmpDirs.pop();
@ -239,3 +244,34 @@ test("deriveState_when_task_summary_exists_keeps_db_task_status_authoritative",
assert.equal(firstTask.id, "T02");
assert.equal(firstTask.status, "pending");
});
test("deriveState_when_db_bootstrap_failed_refuses_legacy_markdown_runtime_fallback", async () => {
const project = mkdtempSync(join(tmpdir(), "sf-db-runtime-state-"));
tmpDirs.push(project);
const milestoneDir = join(project, ".sf", "milestones", "M780");
mkdirSync(milestoneDir, { recursive: true });
writeFileSync(
join(milestoneDir, "M780-ROADMAP.md"),
[
"# M780: legacy roadmap only",
"",
"## Slice Overview",
"| ID | Slice | Risk | Depends | Done | After this |",
"|----|-------|------|---------|------|------------|",
"| S01 | Should require recovery | low | - | | migrate first |",
"",
].join("\n"),
);
try {
openDatabase(join(project, ".sf"));
} catch {}
const state = await deriveState(project);
assert.equal(state.phase, "blocked");
assert.equal(state.activeMilestone, null);
assert.match(state.blockers[0], /does not fall back to markdown authority/i);
assert.match(state.nextAction, /sf recover/i);
assert.match(state.nextAction, /sf migrate/i);
});

View file

@ -2,7 +2,7 @@ import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join } from "node:path";
import { test } from "vitest";
import guardrails from "../../guardrails/index.js";
import {
DIRECT_SF_COMMAND_NAMES,
getSfTopLevelCommandCompletions,
@ -81,3 +81,27 @@ test("human_facing_cli_help_when_describing_sf_surfaces_uses_direct_commands", (
);
}
});
test("guardrails_registers_safegit_as_slash_command_surface", () => {
const commands = new Map();
const pi = {
registerCommand(name, options) {
commands.set(name, options);
},
on() {},
};
guardrails(pi);
for (const commandName of [
"safegit",
"safegit-level",
"safegit-status",
"yolo",
]) {
const command = commands.get(commandName);
assert.equal(typeof command?.handler, "function");
assert.match(command.description, new RegExp(`/${commandName}\\b`));
assert.match(command.description, /Slash command/);
}
});

View file

@ -1,14 +1,20 @@
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import {
getMetricsSystemStats,
getMetricsText,
getSystemPerformanceDashboard,
initMetricsCentral,
queryMetrics,
recordCost,
recordCounter,
recordDatabaseOperation,
recordGauge,
recordHistogram,
recordModelRequest,
recordToolExecution,
registerMetricMeta,
stopMetricsCentral,
updateResourceGauges,
} from "../metrics-central.js";
describe("metrics-central", () => {
@ -119,4 +125,70 @@ describe("metrics-central", () => {
const results = queryMetrics(null, "sess-1", "sf_test");
expect(results).toEqual([]);
});
it("dashboard_reads_counter_gauge_and_histogram_values", () => {
recordCost("unit-dashboard", "model-dashboard", 10, 20, 0.5, "build");
recordToolExecution("read", 30);
recordModelRequest("model-dashboard", 40);
recordDatabaseOperation("select", 50);
updateResourceGauges({
activeSessions: 1,
activeAgents: 2,
concurrentToolCalls: 3,
});
const dashboard = getSystemPerformanceDashboard();
expect(dashboard.cost).toBeGreaterThanOrEqual(0.5);
expect(dashboard.tokens.input).toBeGreaterThanOrEqual(10);
expect(dashboard.tokens.output).toBeGreaterThanOrEqual(20);
expect(dashboard.performance.averageToolExecution).toBeGreaterThanOrEqual(
30,
);
expect(dashboard.performance.averageModelRequest).toBeGreaterThanOrEqual(
40,
);
expect(dashboard.performance.averageDatabaseQuery).toBeGreaterThanOrEqual(
50,
);
expect(dashboard.resources.activeSessions).toBe(1);
expect(dashboard.resources.activeAgents).toBe(2);
expect(dashboard.resources.concurrentToolCalls).toBe(3);
expect(getMetricsSystemStats().databaseStatus).toBe("disconnected");
});
it("stopMetricsCentral_persists_metrics_to_db_adapter", () => {
const rows = [];
const db = {
exec() {},
prepare(sql) {
if (sql.startsWith("INSERT")) {
return {
run(name, type, labels, value, timestamp, sessionId) {
rows.push({ name, type, labels, value, timestamp, sessionId });
},
};
}
throw new Error(`unexpected SQL: ${sql}`);
},
};
initMetricsCentral("/tmp/test-project", {
dbAdapter: db,
sessionId: "sess-db",
});
recordCounter("sf_test_db_counter", { label: "a=b,c" }, 2);
stopMetricsCentral();
expect(rows).toEqual(
expect.arrayContaining([
expect.objectContaining({
name: "sf_test_db_counter",
type: "counter",
labels: JSON.stringify({ label: "a=b,c", session_id: "sess-db" }),
value: 2,
sessionId: "sess-db",
}),
]),
);
});
});

View file

@ -0,0 +1,9 @@
import { describe, expect, it } from "vitest";
describe("SF hook bootstrap", () => {
it("register_hooks_module_imports_without_touching_runtime_pi", async () => {
const mod = await import("../bootstrap/register-hooks.js");
expect(typeof mod.registerHooks).toBe("function");
expect(typeof mod.runAgentEndMemoryBackfill).toBe("function");
});
});

View file

@ -5,7 +5,13 @@
* automatically when later backfills depend on newly introduced columns.
*/
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
import {
existsSync,
mkdirSync,
mkdtempSync,
readdirSync,
rmSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { DatabaseSync } from "node:sqlite";
@ -217,7 +223,7 @@ test("openDatabase_migrates_v27_tasks_without_created_at_through_spec_backfill",
const version = db
.prepare("SELECT MAX(version) AS version FROM schema_version")
.get();
assert.equal(version.version, 45);
assert.equal(version.version, 49);
const taskSpec = db
.prepare(
"SELECT milestone_id, slice_id, task_id, verify FROM task_specs WHERE task_id = 'T01'",
@ -257,6 +263,24 @@ test("openDatabase_when_fresh_db_supports_schedule_entries", () => {
assert.deepEqual(rows[0].payload, { message: "check DB schedule" });
});
test("openDatabase_when_file_backed_creates_db_snapshot_and_maintenance_marker", () => {
const dir = mkdtempSync(join(tmpdir(), "sf-db-backup-"));
tmpDirs.push(dir);
const sfDir = join(dir, ".sf");
mkdirSync(sfDir, { recursive: true });
const dbPath = join(sfDir, "sf.db");
assert.equal(openDatabase(dbPath), true);
closeDatabase();
const backupDir = join(sfDir, "backups", "db");
const backups = readdirSync(backupDir).filter((name) =>
name.startsWith("sf.db."),
);
assert.equal(backups.length, 1);
assert.equal(existsSync(join(backupDir, "maintenance.json")), true);
});
test("openDatabase_when_fresh_db_supports_gate_run_micro_usd", () => {
assert.equal(openDatabase(":memory:"), true);

View file

@ -16,6 +16,7 @@ import {
import {
getModelInvocableSkills,
getPermittedSkills,
getUserInvocableSkills,
loadSkills,
} from "../skills/loader.js";
@ -250,4 +251,32 @@ describe("skill loading", () => {
expect(buildSkills.some((s) => s.name === "review-skill")).toBe(false);
expect(buildSkills.some((s) => s.name === "user-only")).toBe(false);
});
test("getUserInvocableSkills_shows_bundled_runtime_skills_only", () => {
createSkill("human-facing", { userInvocable: true });
createSkill("autoresearch", { userInvocable: false });
const badDir = join(tmpDir, ".agents", "skills", "droid-evolved");
mkdirSync(badDir, { recursive: true });
writeFileSync(
join(badDir, "SKILL.md"),
`---\nname: droid-evolved\ndescription: Workflow-only skill\nuser-invocable: true\n---\n\n# Invalid\n`,
);
const visible = getUserInvocableSkills([
...loadSkills(tmpDir),
{
name: "bundled-human-writing",
source: "bundled",
valid: true,
userInvocable: true,
},
{
name: "project-forge-command-surface",
source: "project",
valid: true,
userInvocable: true,
},
]);
expect(visible.map((s) => s.name)).toEqual(["bundled-human-writing"]);
});
});

View file

@ -22,6 +22,12 @@ vi.mock("../sf-db.js", () => ({
import * as memoryStore from "../memory-store.js";
import * as sfDb from "../sf-db.js";
function expectMemoryCreate(fields) {
expect(memoryStore.createMemory).toHaveBeenCalledWith(
expect.objectContaining(fields),
);
}
describe("UOK Memory Integration", () => {
beforeEach(() => {
vi.clearAllMocks();
@ -36,11 +42,14 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, status, result);
expect(memoryStore.createMemory).toHaveBeenCalledWith(
"pattern",
"Unit type 'execute-task' succeeded with outcome: all tests passed",
0.9,
);
expectMemoryCreate({
category: "pattern",
content:
"Unit type 'execute-task' succeeded with outcome: all tests passed",
confidence: 0.9,
source_unit_type: "execute-task",
source_unit_id: "M001-S01-T01",
});
});
it("records_failed_unit_completion_as_pattern", async () => {
@ -50,11 +59,14 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, status, result);
expect(memoryStore.createMemory).toHaveBeenCalledWith(
"pattern",
"Unit type 'research-slice' failed with status: failed (timeout after 5 minutes)",
0.5,
);
expectMemoryCreate({
category: "pattern",
content:
"Unit type 'research-slice' failed with status: failed (timeout after 5 minutes)",
confidence: 0.5,
source_unit_type: "research-slice",
source_unit_id: "M001-S02",
});
});
it("uses_lower_confidence_for_failures", async () => {
@ -66,8 +78,10 @@ describe("UOK Memory Integration", () => {
await successCall();
await failureCall();
const successConfidence = memoryStore.createMemory.mock.calls[0][2];
const failureConfidence = memoryStore.createMemory.mock.calls[1][2];
const successConfidence =
memoryStore.createMemory.mock.calls[0][0].confidence;
const failureConfidence =
memoryStore.createMemory.mock.calls[1][0].confidence;
expect(successConfidence).toBe(0.9);
expect(failureConfidence).toBe(0.5);
@ -105,11 +119,11 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, status, result);
expect(memoryStore.createMemory).toHaveBeenCalledWith(
"pattern",
expect.stringContaining("blocked"),
0.5,
);
expectMemoryCreate({
category: "pattern",
content: expect.stringContaining("blocked"),
confidence: 0.5,
});
});
it("handles_stale_status_as_failure", async () => {
@ -119,11 +133,11 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, status, result);
expect(memoryStore.createMemory).toHaveBeenCalledWith(
"pattern",
expect.stringContaining("stale"),
0.5,
);
expectMemoryCreate({
category: "pattern",
content: expect.stringContaining("stale"),
confidence: 0.5,
});
});
it("extracts_unitType_from_unitType_property", async () => {
@ -132,11 +146,12 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, status, {});
expect(memoryStore.createMemory).toHaveBeenCalledWith(
"pattern",
expect.stringContaining("plan-milestone"),
0.9,
);
expectMemoryCreate({
category: "pattern",
content: expect.stringContaining("plan-milestone"),
confidence: 0.9,
source_unit_type: "plan-milestone",
});
});
it("defaults_to_unknown_if_no_type_provided", async () => {
@ -145,11 +160,12 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, status, {});
expect(memoryStore.createMemory).toHaveBeenCalledWith(
"pattern",
expect.stringContaining("unknown"),
0.9,
);
expectMemoryCreate({
category: "pattern",
content: expect.stringContaining("unknown"),
confidence: 0.9,
source_unit_type: "unknown",
});
});
it("uses_status_as_outcome_when_result_outcome_missing", async () => {
@ -159,11 +175,11 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, status, result);
expect(memoryStore.createMemory).toHaveBeenCalledWith(
"pattern",
"Unit type 'test-unit' succeeded with outcome: completed",
0.9,
);
expectMemoryCreate({
category: "pattern",
content: "Unit type 'test-unit' succeeded with outcome: completed",
confidence: 0.9,
});
});
it("categorizes_all_patterns_as_pattern_category", async () => {
@ -174,7 +190,7 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, "blocked", {});
for (const call of memoryStore.createMemory.mock.calls) {
expect(call[0]).toBe("pattern");
expect(call[0].category).toBe("pattern");
}
});
@ -186,8 +202,8 @@ describe("UOK Memory Integration", () => {
});
await recordUnitOutcomeInMemory(unit, "failed", { error: "auth failed" });
const call1 = memoryStore.createMemory.mock.calls[0][1];
const call2 = memoryStore.createMemory.mock.calls[1][1];
const call1 = memoryStore.createMemory.mock.calls[0][0].content;
const call2 = memoryStore.createMemory.mock.calls[1][0].content;
expect(call1).toContain("timeout");
expect(call2).toContain("auth failed");
@ -210,7 +226,7 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, "completed", {});
const categoryArg = memoryStore.createMemory.mock.calls[0][0];
const categoryArg = memoryStore.createMemory.mock.calls[0][0].category;
expect(categoryArg).toBe("pattern");
});
@ -220,7 +236,7 @@ describe("UOK Memory Integration", () => {
await recordUnitOutcomeInMemory(unit, "completed", result);
const pattern = memoryStore.createMemory.mock.calls[0][1];
const pattern = memoryStore.createMemory.mock.calls[0][0].content;
expect(pattern).toMatch(/code-review/);
expect(pattern).toMatch(/succeeded/);
expect(pattern).toMatch(/3 files reviewed/);
@ -260,7 +276,7 @@ describe("UOK Memory Integration", () => {
outcome: "very specific outcome",
});
const pattern = memoryStore.createMemory.mock.calls[0][1];
const pattern = memoryStore.createMemory.mock.calls[0][0].content;
expect(pattern).toContain("specific-task-type");
});
});

View file

@ -271,6 +271,58 @@ test("writeUnitRuntimeRecord_merges_updates", () => {
assert.equal(record.progressCount, 2);
});
test("writeUnitRuntimeRecord_when_lineage_event_started_persists_current_worker", () => {
const root = makeProject();
const record = writeUnitRuntimeRecord(
root,
"execute-task",
"M001/S01/T01",
Date.now(),
{
status: "running",
lineageEvent: {
status: "started",
workerSessionId: "worker-1",
spawnId: "spawn-1",
ts: "2026-05-08T00:00:00.000Z",
},
},
);
assert.equal(record.lineage.status, "started");
assert.equal(record.lineage.currentWorkerSessionId, "worker-1");
assert.deepEqual(record.lineage.workerSessionIds, ["worker-1"]);
assert.equal(record.lineage.events[0].spawnId, "spawn-1");
});
test("writeUnitRuntimeRecord_when_lineage_event_completed_clears_current_worker", () => {
const root = makeProject();
const t = Date.now();
writeUnitRuntimeRecord(root, "execute-task", "M001/S01/T01", t, {
status: "running",
lineageEvent: {
status: "started",
workerSessionId: "worker-1",
},
});
const record = writeUnitRuntimeRecord(
root,
"execute-task",
"M001/S01/T01",
t,
{
status: "completed",
lineageEvent: {
status: "completed",
workerSessionId: "worker-1",
},
},
);
assert.equal(record.lineage.status, "completed");
assert.equal(record.lineage.currentWorkerSessionId, null);
assert.equal(record.lineage.completedWorkerSessionId, "worker-1");
assert.equal(record.lineage.events.length, 2);
});
test("writeUnitRuntimeRecord_sanitizes_path_characters", () => {
const root = makeProject();
writeUnitRuntimeRecord(root, "exec/task", "M001/S01/T01", Date.now(), {

View file

@ -23,6 +23,11 @@ export {
USER_SKILL_DIR,
validateSkillFrontmatter,
} from "../skills/index.js";
export {
assessAssertionCoverage,
fulfilledAssertionIdsFromHandoff,
requiredAssertionIdsFromContract,
} from "./assertion-coverage.js";
// ─── Audit & Observability ─────────────────────────────────────────────────
export { buildAuditEnvelope, emitUokAuditEvent } from "./audit.js";
export {
@ -32,6 +37,11 @@ export {
// ─── Gates ─────────────────────────────────────────────────────────────────
export { ChaosMonkey, ChaosMonkeyGate } from "./chaos-monkey.js";
// ─── Model Policy ──────────────────────────────────────────────────────────
export {
buildWorkerContextPackProjection,
validateWorkerContextPackProjection,
} from "./context-pack-projection.js";
// ─── Contracts & Types ────────────────────────────────────────────────────
export { validateGate } from "./contracts.js";
// ─── Coordination Store ───────────────────────────────────────────────────
@ -48,7 +58,6 @@ export {
} from "./diagnostic-synthesis.js";
// ─── Dispatch Envelope ─────────────────────────────────────────────────────
export { buildDispatchEnvelope, explainDispatch } from "./dispatch-envelope.js";
// ─── Execution Graph ───────────────────────────────────────────────────────
export {
buildExecutionGraphSnapshot,
@ -92,8 +101,25 @@ export {
readUokMetrics,
writeUokMetrics,
} from "./metrics-exposition.js";
// ─── Model Policy ──────────────────────────────────────────────────────────
export { applyModelPolicyFilter } from "./model-policy.js";
export {
DEFAULT_MODEL_ROLE_CONSTRAINTS,
ModelRolePolicyValidationError,
normalizeRolePolicies,
normalizeRolePolicy,
SUPPORTED_MODEL_ROLE_CONSTRAINTS,
SUPPORTED_MODEL_ROLES,
validateRolePolicy,
} from "./model-role-policy.js";
export {
emitModelAutoResolvedEvent,
modelRoleForUnitType,
} from "./model-route-evidence.js";
export {
buildModelRouteSnapshot,
redactModelConfigSecrets,
sanitizeModelRouteSnapshot,
} from "./model-route-snapshot.js";
export { MultiPackageGate } from "./multi-package-gate.js";
export { OutcomeLearningGate } from "./outcome-learning-gate.js";
export { signalKernelEnter as signalParityEnter } from "./parity-diff-capture.js";
@ -119,6 +145,11 @@ export {
isExecutionEntryPhase,
isMissingFinalizedContextResult,
} from "./plan-v2.js";
export {
buildUokProgressEvent,
UOK_PROGRESS_EVENT_TYPES,
validateUokProgressEvent,
} from "./progress-event.js";
// ─── Scheduler v2 (Background Work) ────────────────────────────────────────
export {
CancellationToken,
@ -138,6 +169,16 @@ export {
TASK_TERMINAL_STATES,
unitRuntimeToTaskState,
} from "./task-state.js";
export {
normalizeCommandRegistry,
normalizeToolCommandRegistry,
validateCommandRegistry,
validateToolCommandRegistry,
} from "./tool-command-registry.js";
export {
normalizeUnitLineage,
recordUnitLineageEvent,
} from "./unit-lineage.js";
// ─── Unit Runtime ──────────────────────────────────────────────────────────
export {
clearUnitRuntimeRecord,

View file

@ -23,6 +23,10 @@ import {
} from "../paths.js";
import { getSlice, isDbAvailable } from "../sf-db.js";
import { parseUnitId } from "../unit-id.js";
import {
normalizeUnitLineage,
recordUnitLineageEvent,
} from "./unit-lineage.js";
/**
* Lists every unit runtime projection status in UOK lifecycle order.
*
@ -177,7 +181,13 @@ export async function recordUnitOutcomeInMemory(unit, status, result) {
? `Unit type '${unitType}' succeeded with outcome: ${outcome}`
: `Unit type '${unitType}' failed with status: ${status} (${result?.error || "no error info"})`;
await createMemory("pattern", pattern, confidence);
await createMemory({
category: "pattern",
content: pattern,
confidence,
source_unit_type: unitType,
source_unit_id: unit.id ?? unit.unitId ?? null,
});
} catch (_err) {
// Degrade gracefully - memory failures do not block UOK
}
@ -363,6 +373,15 @@ export function writeUnitRuntimeRecord(
: hasUpdate(updates, "recoveryAttempts")
? (updates.recoveryAttempts ?? 0)
: (prev?.retryCount ?? recoveryAttempts ?? 0);
const lineage = updates.lineageEvent
? recordUnitLineageEvent(prev?.lineage ?? { unitType, unitId }, {
unitType,
unitId,
...updates.lineageEvent,
})
: updates.lineage
? normalizeUnitLineage({ unitType, unitId, ...updates.lineage })
: prev?.lineage;
const next = {
version: 1,
unitType,
@ -407,6 +426,7 @@ export function writeUnitRuntimeRecord(
DEFAULT_UNIT_RUNTIME_MAX_RETRIES,
lastRecoveryReason: updates.lastRecoveryReason ?? prev?.lastRecoveryReason,
runawayGuardPause: updates.runawayGuardPause ?? prev?.runawayGuardPause,
...(lineage ? { lineage } : {}),
};
writeFileSync(path, JSON.stringify(next, null, 2) + "\n", "utf-8");
_runtimeCache.set(path, next);

View file

@ -0,0 +1,59 @@
---
name: autoresearch
description: Run an SF-native metric optimization loop. Use inside autonomous workflow when a task has a measurable target such as runtime, bundle size, test duration, model quality, or benchmark score.
---
# Autoresearch
Use this skill when SF should improve a measurable metric by trying one focused
change at a time, measuring it, keeping wins, and discarding regressions.
This is a workflow skill. Do not turn it into a user-facing guide. Produce small
state files that another SF unit can resume.
## Required Inputs
- Goal: what metric should improve.
- Command: how to measure it.
- Direction: `lower` or `higher`.
- Scope: files or packages allowed to change.
- Constraints: tests, deps, compatibility, time budget, and off-limits paths.
Infer these from repo context when possible. Ask only when the metric or command
is unknowable.
## State Files
Create or maintain these at repo root unless the project already has a better
experiment directory:
- `autoresearch.md`: objective, metric, command, scope, constraints, current
best, and what has been tried.
- `autoresearch.sh`: fast reproducible measurement script. It must print
`METRIC name=value`.
- `autoresearch.jsonl`: one JSON object per experiment.
- `autoresearch.checks.sh`: optional correctness backpressure.
Keep `autoresearch.md` terse. It is a resume surface, not a report.
## Loop
1. Read existing state and establish the current best metric.
2. Choose one hypothesis.
3. Edit only in scope.
4. Run `timeout 600 bash autoresearch.sh`.
5. Run `timeout 300 bash autoresearch.checks.sh` if it exists.
6. Log result as `keep`, `discard`, `crash`, or `checks_failed`.
7. Keep improvements. Revert focused regressions.
8. Update `autoresearch.md` with one line per meaningful lesson.
Stop only when the stated budget or target is reached, the user interrupts, or
the repo is no longer in a safe state to continue.
## SF Integration
- Treat each experiment as one autonomous unit with a single hypothesis.
- Prefer cheap checks before expensive benchmarks.
- Use SF/UOK gates when available for commit and parity safety.
- Do not write long research narratives. Preserve enough evidence to resume and
audit the decision.

View file

@ -0,0 +1,44 @@
---
name: human-writing
description: Write and edit concise human-readable docs, plans, records, PR text, and handoffs. Use when creating or revising documentation or other prose that should be sparse, direct, and low-context.
---
# Human Writing
Use this skill for prose that humans will read later: docs, plans, records,
handoffs, PR notes, and status summaries.
## Default Style
- Keep it sparse. Prefer the shortest version that preserves decisions,
evidence, commands, and next actions.
- Write like an engineer leaving a useful note for another engineer.
- Use concrete nouns and exact file, command, model, endpoint, date, or runtime
names when they matter.
- Remove filler, generic framing, hype, and recap paragraphs that do not change
what the reader can do.
- Preserve uncertainty honestly. Say what is known, what is inferred, and what
still needs verification.
- Prefer bullets for scan-heavy material. Prefer short paragraphs for context or
rationale.
## Docs Context Budget
When editing docs, reduce future context load:
- Keep root docs and agent instructions as routing maps, not full doctrine.
- Move deep detail into narrowly named reference docs only when it will be reused.
- Delete duplicated explanations instead of rephrasing them in multiple places.
- Prefer links to canonical docs over pasted summaries.
- Keep generated or temporary research out of hand-maintained docs unless it has
become a durable decision.
## Rewrite Pass
Before finishing prose, do one compression pass:
1. Delete throat-clearing and obvious statements.
2. Collapse repeated ideas into one canonical sentence.
3. Replace broad claims with observed facts.
4. Keep only examples that prevent likely misuse.
5. End with the current state and the next useful action, if there is one.

View file

@ -0,0 +1,58 @@
---
name: sf-wiki
description: Build a sparse SF-native codebase wiki or subsystem map. Use inside autonomous workflow when SF needs durable repo orientation, architecture maps, or generated reference docs.
---
# SF Wiki
Use this skill to generate compact repository documentation for future agents and
humans. The output is an SF-native codebase map, not a Factory upload.
## Output
Default location:
- `.sf/wiki/` for runtime working context.
- `docs/generated/wiki/` only when the user wants tracked generated docs.
Prefer `.sf/wiki/` for autonomous workflow context so tracked docs stay sparse.
## Survey
Use two passes:
1. Structural: README, AGENTS, package manifests, build/test config, docs,
entrypoints, CI, and top-level directories.
2. Source: routes, commands, providers, services, workflows, tests, generated
artifacts, and feature flags.
Reconcile source-derived topics with directory enumeration. If a non-trivial
directory is skipped, record the reason in one sentence.
## Pages
Keep the wiki small by default:
- `index.md`: what this repo is, how to run it, where to start.
- `architecture.md`: major subsystems and data/control flow.
- `workflows.md`: build, test, release, autonomous/SF flows.
- `subsystems.md`: table of subsystem, path, purpose, owner signal, tests.
- `glossary.md`: project terms only.
Add subsystem pages only when the table would become unreadable.
## Style
- Optimize for context reload, not publication.
- Use bullets and tables.
- Link to canonical docs instead of copying them.
- Avoid prose history unless it changes how someone should work.
- Mark stale or uncertain facts explicitly.
## Verification
Before finishing:
- Check every referenced path exists or is intentionally historical.
- Run cheap repo discovery commands again if files changed during the scan.
- Leave a short freshness note with date, commit, and commands used.

View file

@ -45,6 +45,8 @@ test("buildAutoBootstrapContext includes purpose docs and source inventory", ()
assert.match(context, /ACE spec-first TDD/);
assert.match(context, /explorer-style subagents/);
assert.match(context, /harness-engineering principles/);
assert.match(context, /review\/export or recovery surfaces/);
assert.match(context, /canonical structured runtime state/);
assert.match(context, /## \.sf\/PROJECT\.md/);
assert.match(context, /## VISION\.md/);
assert.match(context, /## TODO\.md/);

View file

@ -1,7 +1,13 @@
import { describe, expect, it, beforeEach, afterEach } from "vitest";
import { mkdtempSync, rmSync, readFileSync, existsSync, readdirSync } from "node:fs";
import {
existsSync,
mkdtempSync,
readdirSync,
readFileSync,
rmSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { configureLogger, getLogger, resetLoggerConfig } from "../logger.js";
describe("logger", () => {
@ -64,7 +70,9 @@ describe("logger", () => {
log.info("key is sk-ant-abc123def456");
// The customSink receives record.message where pattern redaction
// has already been applied (via buildRedactingSink).
expect(logs.some((l) => l.includes("[REDACTED]") && !l.includes("sk-ant-"))).toBe(true);
expect(
logs.some((l) => l.includes("[REDACTED]") && !l.includes("sk-ant-")),
).toBe(true);
});
it("home directory paths are redacted to ~", async () => {
@ -82,7 +90,9 @@ describe("logger", () => {
// The customSink receives record.message where pattern redaction
// has already been applied (via buildRedactingSink).
expect(logs.some((l) => l.includes("~/projects/foo"))).toBe(true);
expect(logs.some((l) => l.includes(home) && !l.includes("~"))).toBe(false);
expect(logs.some((l) => l.includes(home) && !l.includes("~"))).toBe(
false,
);
});
});
@ -150,12 +160,14 @@ describe("logger", () => {
mode: "autonomous",
logDir: tmpDir,
customSink: (record) => {
lines.push(JSON.stringify({
ts: record.timestamp,
level: record.level,
category: record.category,
message: record.message,
}));
lines.push(
JSON.stringify({
ts: record.timestamp,
level: record.level,
category: record.category,
message: record.message,
}),
);
},
});
const log = getLogger("sf.autonomous");