autoresearch: auto-fix format + organizeImports
Result: {"status": "keep", "diagnostics": 11, "errors": 0, "warnings": 11}
This commit is contained in:
parent
72e27f9ba8
commit
c6ee7701b2
24 changed files with 1797 additions and 201 deletions
|
|
@ -1,2 +1,3 @@
|
|||
{"type": "config", "name": "reduce-biome-diagnostics", "metricName": "diagnostics", "metricUnit": "", "bestDirection": "lower"}
|
||||
{"run": 1, "commit": "15269f4", "metric": 40.0, "metrics": {}, "status": "keep", "description": "baseline measurement", "timestamp": 1778242955776, "segment": 0, "confidence": null, "asi": {"hypothesis": "baseline measurement", "breakdown": "26 errors, 13 warnings, 1 info"}}
|
||||
{"run": 2, "commit": "72e27f9", "metric": 11.0, "metrics": {}, "status": "keep", "description": "auto-fix format + organizeImports: biome check --write src/", "timestamp": 1778243276590, "segment": 0, "confidence": null, "asi": {"hypothesis": "All 26 errors are auto-fixable format/organizeImports; fixing them drops total from 40 to 11", "breakdown": "0 errors, 11 warnings"}}
|
||||
|
|
|
|||
|
|
@ -752,6 +752,21 @@ Still needed:
|
|||
|
||||
- Remove `/sf` from docs/web/tests (Phase 2 deprecation)
|
||||
|
||||
Completed ✓ (RA.Aid Patterns — Phase 2):
|
||||
|
||||
- structured memory repositories (`memory-repository.js` — SQLite-backed key facts,
|
||||
snippets, research notes, human inputs, work logs, decisions; content hash
|
||||
deduplication; auto-summarization; prompt formatting; 11 tests pass)
|
||||
- trajectory recording (`trajectory-recorder.js` — per-step tool/LLM/error
|
||||
execution trace with costs, tokens, errors; session+unit scoped; exportable;
|
||||
10 tests pass)
|
||||
- trajectory command (`/trajectory` — step-by-step trace with `--all`, `--errors`,
|
||||
`--tools`, `--llm`, `--limit=N` flags; wired into `commands/handlers/ops.js`)
|
||||
- reasoning assist + memory integration (`reasoning-assist.js` loads key facts,
|
||||
snippets, research notes from memory repository into pre-stage consultation prompt)
|
||||
- compaction fix (`register-hooks.js` — never cancel compaction; provide custom
|
||||
compaction summary with work state preservation instead)
|
||||
|
||||
Completed ✓ (Additional):
|
||||
|
||||
- schema-backed task/frontmatter fields (`task-frontmatter.js` — risk levels,
|
||||
|
|
|
|||
|
|
@ -58,6 +58,11 @@ import {
|
|||
readProductionMutationApprovalStatus,
|
||||
} from "../production-mutation-approval.js";
|
||||
import { pauseAutoForProviderError } from "../provider-error-pause.js";
|
||||
import {
|
||||
buildReasoningAssistPrompt,
|
||||
injectReasoningGuidance,
|
||||
isReasoningAssistEnabled,
|
||||
} from "../reasoning-assist.js";
|
||||
import {
|
||||
loadEvidenceFromDisk,
|
||||
resetEvidence,
|
||||
|
|
@ -78,11 +83,6 @@ import {
|
|||
} from "../sf-db.js";
|
||||
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
|
||||
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
|
||||
import {
|
||||
buildReasoningAssistPrompt,
|
||||
injectReasoningGuidance,
|
||||
isReasoningAssistEnabled,
|
||||
} from "../reasoning-assist.js";
|
||||
import { handleProductAudit } from "../tools/product-audit-tool.js";
|
||||
import { parseUnitId } from "../unit-id.js";
|
||||
import { resolveUokFlags } from "../uok/flags.js";
|
||||
|
|
|
|||
|
|
@ -435,54 +435,124 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
}
|
||||
});
|
||||
pi.on("session_before_compact", async () => {
|
||||
// Only cancel compaction while autonomous mode is actively running.
|
||||
// Paused autonomous mode should allow compaction — the user may be doing
|
||||
// interactive work (#3165).
|
||||
if (isAutoActive()) {
|
||||
return { cancel: true };
|
||||
}
|
||||
// Never cancel compaction — instead provide a custom compaction summary
|
||||
// that preserves work state. Cancelling compaction causes context overflow
|
||||
// which degrades performance and can hit hard limits. The custom summary
|
||||
// ensures the agent retains critical context after compaction.
|
||||
const basePath = process.cwd();
|
||||
const { ensureDbOpen } = await import("./dynamic-tools.js");
|
||||
await ensureDbOpen();
|
||||
const state = await deriveState(basePath);
|
||||
if (!state.activeMilestone || !state.activeSlice || !state.activeTask)
|
||||
return;
|
||||
if (state.phase !== "executing") return;
|
||||
const sliceDir = resolveSlicePath(
|
||||
basePath,
|
||||
state.activeMilestone.id,
|
||||
state.activeSlice.id,
|
||||
);
|
||||
if (!sliceDir) return;
|
||||
const existingFile = resolveSliceFile(
|
||||
basePath,
|
||||
state.activeMilestone.id,
|
||||
state.activeSlice.id,
|
||||
"CONTINUE",
|
||||
);
|
||||
if (existingFile && (await loadFile(existingFile))) return;
|
||||
const legacyContinue = join(sliceDir, "continue.md");
|
||||
if (await loadFile(legacyContinue)) return;
|
||||
const continuePath = join(sliceDir, `${state.activeSlice.id}-CONTINUE.md`);
|
||||
await saveFile(
|
||||
continuePath,
|
||||
formatContinue({
|
||||
frontmatter: {
|
||||
milestone: state.activeMilestone.id,
|
||||
slice: state.activeSlice.id,
|
||||
task: state.activeTask.id,
|
||||
step: 0,
|
||||
totalSteps: 0,
|
||||
status: "compacted",
|
||||
savedAt: new Date().toISOString(),
|
||||
|
||||
// Build work state summary for compaction context preservation
|
||||
const workState = [];
|
||||
if (state.activeMilestone) {
|
||||
workState.push(
|
||||
`Active milestone: ${state.activeMilestone.id} (${state.activeMilestone.title})`,
|
||||
);
|
||||
}
|
||||
if (state.activeSlice) {
|
||||
workState.push(
|
||||
`Active slice: ${state.activeSlice.id} (${state.activeSlice.title})`,
|
||||
);
|
||||
}
|
||||
if (state.activeTask) {
|
||||
workState.push(
|
||||
`Active task: ${state.activeTask.id} (${state.activeTask.title})`,
|
||||
);
|
||||
}
|
||||
if (state.phase) {
|
||||
workState.push(`Current phase: ${state.phase}`);
|
||||
}
|
||||
|
||||
// Include mode state in compaction summary
|
||||
const { getAutoSession } = await import("../auto/session.js");
|
||||
const session = getAutoSession();
|
||||
const mode = session?.getMode?.();
|
||||
if (mode) {
|
||||
workState.push(
|
||||
`Work mode: ${mode.workMode}, Run control: ${mode.runControl}, Permission: ${mode.permissionProfile}, Model: ${mode.modelMode}`,
|
||||
);
|
||||
}
|
||||
|
||||
// If autonomous mode is active, include current unit details
|
||||
if (isAutoActive()) {
|
||||
const dash = getAutoDashboardData();
|
||||
if (dash.currentUnit) {
|
||||
workState.push(
|
||||
`Current unit: ${dash.currentUnit.type} ${dash.currentUnit.id}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Write CONTINUE file for task recovery if in executing phase
|
||||
if (
|
||||
state.activeMilestone &&
|
||||
state.activeSlice &&
|
||||
state.activeTask &&
|
||||
state.phase === "executing"
|
||||
) {
|
||||
const sliceDir = resolveSlicePath(
|
||||
basePath,
|
||||
state.activeMilestone.id,
|
||||
state.activeSlice.id,
|
||||
);
|
||||
if (sliceDir) {
|
||||
const existingFile = resolveSliceFile(
|
||||
basePath,
|
||||
state.activeMilestone.id,
|
||||
state.activeSlice.id,
|
||||
"CONTINUE",
|
||||
);
|
||||
const hasExisting = existingFile && (await loadFile(existingFile));
|
||||
const legacyContinue = join(sliceDir, "continue.md");
|
||||
const hasLegacy = await loadFile(legacyContinue);
|
||||
|
||||
if (!hasExisting && !hasLegacy) {
|
||||
const continuePath = join(
|
||||
sliceDir,
|
||||
`${state.activeSlice.id}-CONTINUE.md`,
|
||||
);
|
||||
await saveFile(
|
||||
continuePath,
|
||||
formatContinue({
|
||||
frontmatter: {
|
||||
milestone: state.activeMilestone.id,
|
||||
slice: state.activeSlice.id,
|
||||
task: state.activeTask.id,
|
||||
step: 0,
|
||||
totalSteps: 0,
|
||||
status: "compacted",
|
||||
savedAt: new Date().toISOString(),
|
||||
},
|
||||
completedWork: `Task ${state.activeTask.id} (${state.activeTask.title}) was in progress when compaction occurred.`,
|
||||
remainingWork: "Check the task plan for remaining steps.",
|
||||
decisions: "Check task summary files for prior decisions.",
|
||||
context: `Session was compacted. Work state: ${workState.join("; ")}`,
|
||||
nextAction: `Resume task ${state.activeTask.id}: ${state.activeTask.title}.`,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return custom compaction summary that preserves work state
|
||||
// instead of cancelling compaction
|
||||
return {
|
||||
compaction: {
|
||||
summary:
|
||||
workState.length > 0
|
||||
? `Work in progress: ${workState.join(". ")}.`
|
||||
: "Session compacted. No active work state.",
|
||||
firstKeptEntryId: undefined, // Let Pi decide
|
||||
tokensBefore: undefined, // Let Pi measure
|
||||
details: {
|
||||
workState,
|
||||
isAutoActive: isAutoActive(),
|
||||
mode: mode || null,
|
||||
},
|
||||
completedWork: `Task ${state.activeTask.id} (${state.activeTask.title}) was in progress when compaction occurred.`,
|
||||
remainingWork: "Check the task plan for remaining steps.",
|
||||
decisions: "Check task summary files for prior decisions.",
|
||||
context: "Session was auto-compacted by Pi. Resume with /next.",
|
||||
nextAction: `Resume task ${state.activeTask.id}: ${state.activeTask.title}.`,
|
||||
}),
|
||||
);
|
||||
},
|
||||
};
|
||||
});
|
||||
pi.on("session_shutdown", async (_event, ctx) => {
|
||||
resetLearningRuntime();
|
||||
|
|
@ -677,6 +747,30 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
);
|
||||
if (result.block) return result;
|
||||
});
|
||||
// ── Trajectory recording: capture tool calls ──
|
||||
pi.on("tool_call", async (event) => {
|
||||
try {
|
||||
const { recordTrajectoryStep, STEP_TYPES } = await import(
|
||||
"../trajectory-recorder.js"
|
||||
);
|
||||
const sessionId = process.env.SF_SESSION_ID || "default";
|
||||
const dash = getAutoDashboardData();
|
||||
const unitId = dash.currentUnit?.id;
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
unitId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
toolName: event.toolName,
|
||||
toolParams: event.input,
|
||||
metadata: {
|
||||
toolCallId: event.toolCallId,
|
||||
isAutoActive: isAutoActive(),
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// Trajectory recording is best-effort
|
||||
}
|
||||
});
|
||||
// ── Safety harness: evidence collection + destructive command warnings ──
|
||||
pi.on("tool_call", async (event, ctx) => {
|
||||
if (!isAutoActive()) return;
|
||||
|
|
@ -901,6 +995,40 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Trajectory recording: capture every tool execution step
|
||||
try {
|
||||
const { recordTrajectoryStep, STEP_TYPES } = await import(
|
||||
"../trajectory-recorder.js"
|
||||
);
|
||||
const sessionId = process.env.SF_SESSION_ID || "default";
|
||||
const dash = getAutoDashboardData();
|
||||
const unitId = dash.currentUnit?.id;
|
||||
|
||||
// Record tool result
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
unitId,
|
||||
stepType: event.isError ? STEP_TYPES.ERROR : STEP_TYPES.TOOL_RESULT,
|
||||
toolName: event.toolName,
|
||||
toolResult:
|
||||
typeof event.result === "string"
|
||||
? event.result
|
||||
: JSON.stringify(event.result).slice(0, 2000),
|
||||
isError: event.isError,
|
||||
errorMessage: event.isError
|
||||
? (typeof event.result === "string"
|
||||
? event.result
|
||||
: String(event.result)
|
||||
).slice(0, 500)
|
||||
: undefined,
|
||||
metadata: {
|
||||
duration_ms: event.durationMs,
|
||||
toolCallId: event.toolCallId,
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// Trajectory recording is best-effort; don't fail tool execution
|
||||
}
|
||||
});
|
||||
pi.on("model_select", async (_event, ctx) => {
|
||||
await syncServiceTierStatus(ctx);
|
||||
|
|
|
|||
|
|
@ -84,7 +84,14 @@ export const TOP_LEVEL_SUBCOMMANDS = [
|
|||
{ cmd: "plan", desc: "Force planning stage for current unit" },
|
||||
{ cmd: "implement", desc: "Force implementation stage for current unit" },
|
||||
{ cmd: "history", desc: "View execution history" },
|
||||
{ cmd: "cost", desc: "Show cost summary from metrics-central or legacy ledger" },
|
||||
{
|
||||
cmd: "cost",
|
||||
desc: "Show cost summary from metrics-central or legacy ledger",
|
||||
},
|
||||
{
|
||||
cmd: "trajectory",
|
||||
desc: "View execution trajectory — step-by-step trace with costs and errors",
|
||||
},
|
||||
{ cmd: "undo", desc: "Revert last completed unit" },
|
||||
{
|
||||
cmd: "undo-task",
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ export function showHelp(ctx, args = "") {
|
|||
" /plan Force planning stage for current unit",
|
||||
" /implement Force implementation stage for current unit",
|
||||
" /history View execution history [--cost] [--phase] [--model] [N]",
|
||||
" /trajectory View execution trajectory — step-by-step trace with costs and errors",
|
||||
" /changelog Show categorized release notes [version]",
|
||||
` /notifications View persistent notification history [clear|tail|filter] (${formattedShortcutPair("notifications")})`,
|
||||
"",
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ import { handlePrBranch } from "../../commands-pr-branch.js";
|
|||
import { handleRate } from "../../commands-rate.js";
|
||||
import { handleSessionReport } from "../../commands-session-report.js";
|
||||
import { handleShip } from "../../commands-ship.js";
|
||||
import { handleExport } from "../../export.js";
|
||||
import { handleCost } from "../../cost-command.js";
|
||||
import { handleExport } from "../../export.js";
|
||||
import { handleHistory } from "../../history.js";
|
||||
import { handleUndo } from "../../undo.js";
|
||||
import { projectRoot } from "../context.js";
|
||||
|
|
@ -126,6 +126,15 @@ export async function handleOpsCommand(trimmed, ctx, pi) {
|
|||
);
|
||||
return true;
|
||||
}
|
||||
if (trimmed === "trajectory" || trimmed.startsWith("trajectory ")) {
|
||||
const { handleTrajectory } = await import("../../trajectory-command.js");
|
||||
await handleTrajectory(
|
||||
trimmed.replace(/^trajectory\s*/, "").trim(),
|
||||
ctx,
|
||||
projectRoot(),
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if (trimmed === "undo-task" || trimmed.startsWith("undo-task ")) {
|
||||
const { handleUndoTask } = await import("../../undo.js");
|
||||
await handleUndoTask(
|
||||
|
|
|
|||
|
|
@ -6,11 +6,7 @@
|
|||
*
|
||||
* Consumer: /cost CLI command.
|
||||
*/
|
||||
import {
|
||||
formatCost,
|
||||
getLedger,
|
||||
loadLedgerFromDisk,
|
||||
} from "./metrics.js";
|
||||
import { formatCost, getLedger, loadLedgerFromDisk } from "./metrics.js";
|
||||
import { queryMetrics } from "./metrics-central.js";
|
||||
import { getDatabase } from "./sf-db.js";
|
||||
|
||||
|
|
@ -34,7 +30,9 @@ export async function handleCost(args, ctx, basePath) {
|
|||
];
|
||||
for (const row of rows.slice(0, 20)) {
|
||||
const labels = JSON.parse(row.labels || "{}");
|
||||
lines.push(` ${labels.unit_id || "?"}: ${formatCost(row.value)} (${labels.model_id || "?"})`);
|
||||
lines.push(
|
||||
` ${labels.unit_id || "?"}: ${formatCost(row.value)} (${labels.model_id || "?"})`,
|
||||
);
|
||||
}
|
||||
ctx.ui.notify(lines.join("\n"), "info");
|
||||
return;
|
||||
|
|
|
|||
324
src/resources/extensions/sf/memory-repository.js
Normal file
324
src/resources/extensions/sf/memory-repository.js
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
/**
|
||||
* Memory Repository — Structured fact/snippet/note storage for SF units.
|
||||
*
|
||||
* Purpose: Provide RA.Aid-style structured memory with DB persistence.
|
||||
* Stores key facts, code snippets, research notes, and human inputs per session.
|
||||
*
|
||||
* Consumer: research units, planning units, execute-task units, /memory command.
|
||||
*
|
||||
* Design:
|
||||
* - SQLite-backed with JSONB-like flexibility
|
||||
* - Session-scoped: memories belong to a session
|
||||
* - Unit-scoped: memories can be tagged with unitId for filtering
|
||||
* - Auto-summarization when count exceeds threshold
|
||||
* - Deduplication via content hash
|
||||
*/
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
import { debugLog } from "./debug-logger.js";
|
||||
import { isDbAvailable, withQueryTimeout } from "./sf-db.js";
|
||||
import { logWarning } from "./workflow-logger.js";
|
||||
|
||||
const MEMORY_TABLE = "sf_memory";
|
||||
const AUTO_SUMMARIZE_THRESHOLD = 50;
|
||||
|
||||
/**
|
||||
* Memory types matching RA.Aid's structured memory model.
|
||||
*/
|
||||
export const MEMORY_TYPES = {
|
||||
KEY_FACT: "key_fact", // Important discovery, rule, or finding
|
||||
KEY_SNIPPET: "key_snippet", // Code snippet with file location
|
||||
RESEARCH_NOTE: "research_note", // Detailed research findings
|
||||
HUMAN_INPUT: "human_input", // User-provided input or clarification
|
||||
WORK_LOG: "work_log", // Unit completion or milestone event
|
||||
DECISION: "decision", // Architectural or design decision
|
||||
};
|
||||
|
||||
/**
|
||||
* Ensure the memory table exists.
|
||||
*/
|
||||
export function ensureMemoryTable(db) {
|
||||
if (!db) return false;
|
||||
try {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS ${MEMORY_TABLE} (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
unit_id TEXT,
|
||||
type TEXT NOT NULL CHECK(type IN ('key_fact', 'key_snippet', 'research_note', 'human_input', 'work_log', 'decision')),
|
||||
content TEXT NOT NULL,
|
||||
metadata TEXT, -- JSON: {filepath, line_number, description, source, tags}
|
||||
content_hash TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_session ON ${MEMORY_TABLE}(session_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_type ON ${MEMORY_TABLE}(type);
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_unit ON ${MEMORY_TABLE}(unit_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_hash ON ${MEMORY_TABLE}(content_hash);
|
||||
`);
|
||||
return true;
|
||||
} catch (err) {
|
||||
logWarning("memory", "Failed to ensure memory table", {
|
||||
error: String(err),
|
||||
});
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function computeHash(content) {
|
||||
return createHash("sha256").update(content).digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a memory entry.
|
||||
*
|
||||
* @param {object} params
|
||||
* @param {string} params.sessionId — required
|
||||
* @param {string} [params.unitId] — optional unit tag
|
||||
* @param {string} params.type — MEMORY_TYPES value
|
||||
* @param {string} params.content — memory content
|
||||
* @param {object} [params.metadata] — optional structured metadata
|
||||
* @param {object} [params.db] — database connection
|
||||
* @returns {{id: number, created: boolean, deduped: boolean}|null}
|
||||
*/
|
||||
export function storeMemory({
|
||||
sessionId,
|
||||
unitId,
|
||||
type,
|
||||
content,
|
||||
metadata,
|
||||
db,
|
||||
}) {
|
||||
if (!sessionId || !type || !content) {
|
||||
logWarning("memory", "storeMemory missing required fields", {
|
||||
sessionId,
|
||||
type,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
if (!Object.values(MEMORY_TYPES).includes(type)) {
|
||||
logWarning("memory", "Invalid memory type", { type });
|
||||
return null;
|
||||
}
|
||||
|
||||
const hash = computeHash(content);
|
||||
const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null);
|
||||
if (!dbConn) return null;
|
||||
|
||||
ensureMemoryTable(dbConn);
|
||||
|
||||
try {
|
||||
// Deduplication: check for existing hash in same session
|
||||
const existing = dbConn
|
||||
.prepare(
|
||||
`SELECT id FROM ${MEMORY_TABLE} WHERE session_id = ? AND content_hash = ? LIMIT 1`,
|
||||
)
|
||||
.get(sessionId, hash);
|
||||
|
||||
if (existing) {
|
||||
debugLog("memory-dedup", { sessionId, type, hash: hash.slice(0, 8) });
|
||||
return { id: existing.id, created: false, deduped: true };
|
||||
}
|
||||
|
||||
const result = dbConn
|
||||
.prepare(
|
||||
`INSERT INTO ${MEMORY_TABLE} (session_id, unit_id, type, content, metadata, content_hash)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run(
|
||||
sessionId,
|
||||
unitId || null,
|
||||
type,
|
||||
content,
|
||||
metadata ? JSON.stringify(metadata) : null,
|
||||
hash,
|
||||
);
|
||||
|
||||
// Auto-summarize check
|
||||
maybeAutoSummarize(dbConn, sessionId);
|
||||
|
||||
return {
|
||||
id: Number(result.lastInsertRowid),
|
||||
created: true,
|
||||
deduped: false,
|
||||
};
|
||||
} catch (err) {
|
||||
logWarning("memory", "storeMemory failed", {
|
||||
error: String(err),
|
||||
sessionId,
|
||||
type,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve memories with optional filtering.
|
||||
*
|
||||
* @param {object} params
|
||||
* @param {string} params.sessionId — required
|
||||
* @param {string} [params.type] — filter by memory type
|
||||
* @param {string} [params.unitId] — filter by unit
|
||||
* @param {string} [params.query] — substring search in content
|
||||
* @param {number} [params.limit=50] — max results
|
||||
* @param {object} [params.db]
|
||||
* @returns {Array<{id, sessionId, unitId, type, content, metadata, createdAt}>}
|
||||
*/
|
||||
export function getMemories({
|
||||
sessionId,
|
||||
type,
|
||||
unitId,
|
||||
query,
|
||||
limit = 50,
|
||||
db,
|
||||
}) {
|
||||
const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null);
|
||||
if (!dbConn) return [];
|
||||
|
||||
ensureMemoryTable(dbConn);
|
||||
|
||||
try {
|
||||
const conditions = ["session_id = ?"];
|
||||
const params = [sessionId];
|
||||
|
||||
if (type) {
|
||||
conditions.push("type = ?");
|
||||
params.push(type);
|
||||
}
|
||||
if (unitId) {
|
||||
conditions.push("unit_id = ?");
|
||||
params.push(unitId);
|
||||
}
|
||||
if (query) {
|
||||
conditions.push("content LIKE ?");
|
||||
params.push(`%${query}%`);
|
||||
}
|
||||
|
||||
const whereClause = conditions.join(" AND ");
|
||||
params.push(limit);
|
||||
|
||||
const rows = dbConn
|
||||
.prepare(
|
||||
`SELECT id, session_id, unit_id, type, content, metadata, created_at
|
||||
FROM ${MEMORY_TABLE}
|
||||
WHERE ${whereClause}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ?`,
|
||||
)
|
||||
.all(...params);
|
||||
|
||||
return rows.map((r) => ({
|
||||
id: r.id,
|
||||
sessionId: r.session_id,
|
||||
unitId: r.unit_id,
|
||||
type: r.type,
|
||||
content: r.content,
|
||||
metadata: r.metadata ? JSON.parse(r.metadata) : null,
|
||||
createdAt: r.created_at,
|
||||
}));
|
||||
} catch (err) {
|
||||
logWarning("memory", "getMemories failed", {
|
||||
error: String(err),
|
||||
sessionId,
|
||||
});
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get memory counts by type for a session.
|
||||
*/
|
||||
export function getMemoryCounts(sessionId, db) {
|
||||
const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null);
|
||||
if (!dbConn) return {};
|
||||
|
||||
ensureMemoryTable(dbConn);
|
||||
|
||||
try {
|
||||
const rows = dbConn
|
||||
.prepare(
|
||||
`SELECT type, COUNT(*) as count FROM ${MEMORY_TABLE} WHERE session_id = ? GROUP BY type`,
|
||||
)
|
||||
.all(sessionId);
|
||||
|
||||
const counts = {};
|
||||
for (const r of rows) counts[r.type] = r.count;
|
||||
return counts;
|
||||
} catch (err) {
|
||||
logWarning("memory", "getMemoryCounts failed", { error: String(err) });
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format memories for prompt injection (RA.Aid-style).
|
||||
*/
|
||||
export function formatMemoriesForPrompt(memories, options = {}) {
|
||||
if (!memories || memories.length === 0) return "";
|
||||
|
||||
const { type = null, maxChars = 4000, header = null } = options;
|
||||
const filtered = type ? memories.filter((m) => m.type === type) : memories;
|
||||
|
||||
const parts = [];
|
||||
if (header) parts.push(header);
|
||||
|
||||
let totalChars = 0;
|
||||
for (const m of filtered) {
|
||||
const line = `[${m.type}] ${m.content}`;
|
||||
if (totalChars + line.length > maxChars) {
|
||||
parts.push(
|
||||
`... (${filtered.length - parts.length + (header ? 1 : 0)} more)`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
parts.push(line);
|
||||
totalChars += line.length + 1;
|
||||
}
|
||||
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-summarize when memory count exceeds threshold.
|
||||
*/
|
||||
function maybeAutoSummarize(db, sessionId) {
|
||||
try {
|
||||
const count = db
|
||||
.prepare(`SELECT COUNT(*) as c FROM ${MEMORY_TABLE} WHERE session_id = ?`)
|
||||
.get(sessionId).c;
|
||||
|
||||
if (count >= AUTO_SUMMARIZE_THRESHOLD) {
|
||||
debugLog("memory-auto-summarize", {
|
||||
sessionId,
|
||||
count,
|
||||
threshold: AUTO_SUMMARIZE_THRESHOLD,
|
||||
});
|
||||
// Future: dispatch a summarization unit or compress oldest memories
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete memories older than a given date.
|
||||
*/
|
||||
export function pruneMemories(sessionId, olderThan, db) {
|
||||
const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null);
|
||||
if (!dbConn) return 0;
|
||||
|
||||
ensureMemoryTable(dbConn);
|
||||
|
||||
try {
|
||||
const result = dbConn
|
||||
.prepare(
|
||||
`DELETE FROM ${MEMORY_TABLE} WHERE session_id = ? AND created_at < ?`,
|
||||
)
|
||||
.run(sessionId, olderThan);
|
||||
return result.changes;
|
||||
} catch (err) {
|
||||
logWarning("memory", "pruneMemories failed", { error: String(err) });
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -93,7 +93,11 @@ class Gauge {
|
|||
}
|
||||
|
||||
class Histogram {
|
||||
constructor(name, help, buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]) {
|
||||
constructor(
|
||||
name,
|
||||
help,
|
||||
buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
|
||||
) {
|
||||
this.name = name;
|
||||
this.help = help;
|
||||
this.buckets = [...buckets].sort((a, b) => a - b);
|
||||
|
|
@ -116,7 +120,9 @@ class Histogram {
|
|||
yield `# HELP ${this.name} ${this.help}`;
|
||||
yield `# TYPE ${this.name} histogram`;
|
||||
for (const bucket of this.buckets) {
|
||||
yield fmtLine(`${this.name}_bucket`, this.counts.get(bucket) ?? 0, { le: String(bucket) });
|
||||
yield fmtLine(`${this.name}_bucket`, this.counts.get(bucket) ?? 0, {
|
||||
le: String(bucket),
|
||||
});
|
||||
}
|
||||
yield fmtLine(`${this.name}_bucket`, this.count, { le: "+Inf" });
|
||||
yield fmtLine(`${this.name}_sum`, this.sum);
|
||||
|
|
@ -127,7 +133,10 @@ class Histogram {
|
|||
// ─── Label Escaping ─────────────────────────────────────────────────────────
|
||||
|
||||
function _escapeLabel(v) {
|
||||
return String(v).replace(/\\/g, "\\\\").replace(/=/g, "\\=").replace(/,/g, "\\,");
|
||||
return String(v)
|
||||
.replace(/\\/g, "\\\\")
|
||||
.replace(/=/g, "\\=")
|
||||
.replace(/,/g, "\\,");
|
||||
}
|
||||
|
||||
function _unescapeLabel(v) {
|
||||
|
|
@ -146,7 +155,7 @@ function _parseLabelKey(key) {
|
|||
let i = 0;
|
||||
while (i < key.length) {
|
||||
// Find the '=' separator for this label
|
||||
let eqIdx = key.indexOf("=", i);
|
||||
const eqIdx = key.indexOf("=", i);
|
||||
if (eqIdx === -1) break;
|
||||
const k = key.slice(i, eqIdx);
|
||||
// Parse the value, handling escapes
|
||||
|
|
@ -188,12 +197,14 @@ function fmtLine(name, value, labels = {}) {
|
|||
|
||||
function validateMetricName(name) {
|
||||
if (!name || typeof name !== "string") {
|
||||
throw new TypeError(`Metric name must be a non-empty string, got: ${typeof name}`);
|
||||
throw new TypeError(
|
||||
`Metric name must be a non-empty string, got: ${typeof name}`,
|
||||
);
|
||||
}
|
||||
if (!METRIC_NAME_PATTERN.test(name)) {
|
||||
throw new Error(
|
||||
`Invalid metric name "${name}". Must match Prometheus naming convention: ` +
|
||||
`^[a-zA-Z_:][a-zA-Z0-9_:]*$`
|
||||
`^[a-zA-Z_:][a-zA-Z0-9_:]*$`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -283,8 +294,12 @@ function ensureMetricsTable(db) {
|
|||
)
|
||||
`);
|
||||
db.exec(`CREATE INDEX IF NOT EXISTS idx_metrics_name ON metrics(name)`);
|
||||
db.exec(`CREATE INDEX IF NOT EXISTS idx_metrics_session ON metrics(session_id)`);
|
||||
db.exec(`CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics(timestamp)`);
|
||||
db.exec(
|
||||
`CREATE INDEX IF NOT EXISTS idx_metrics_session ON metrics(session_id)`,
|
||||
);
|
||||
db.exec(
|
||||
`CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics(timestamp)`,
|
||||
);
|
||||
} catch (err) {
|
||||
logWarning("metrics-central", `DB table creation failed: ${err.message}`);
|
||||
}
|
||||
|
|
@ -296,22 +311,43 @@ function persistMetricsToDb(registry, sessionId, db) {
|
|||
const ts = new Date().toISOString();
|
||||
try {
|
||||
const insert = db.prepare(
|
||||
"INSERT INTO metrics (name, type, labels, value, timestamp, session_id) VALUES (?, ?, ?, ?, ?, ?)"
|
||||
"INSERT INTO metrics (name, type, labels, value, timestamp, session_id) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
);
|
||||
for (const c of registry.counters.values()) {
|
||||
for (const [key, value] of c.values) {
|
||||
const labels = c._parseKey(key);
|
||||
insert.run(c.name, "counter", JSON.stringify(labels), value, ts, sessionId);
|
||||
insert.run(
|
||||
c.name,
|
||||
"counter",
|
||||
JSON.stringify(labels),
|
||||
value,
|
||||
ts,
|
||||
sessionId,
|
||||
);
|
||||
}
|
||||
}
|
||||
for (const g of registry.gauges.values()) {
|
||||
for (const [key, value] of g.values) {
|
||||
const labels = g._parseKey(key);
|
||||
insert.run(g.name, "gauge", JSON.stringify(labels), value, ts, sessionId);
|
||||
insert.run(
|
||||
g.name,
|
||||
"gauge",
|
||||
JSON.stringify(labels),
|
||||
value,
|
||||
ts,
|
||||
sessionId,
|
||||
);
|
||||
}
|
||||
}
|
||||
for (const h of registry.histograms.values()) {
|
||||
insert.run(h.name, "histogram", JSON.stringify({ count: h.count, sum: h.sum }), h.sum, ts, sessionId);
|
||||
insert.run(
|
||||
h.name,
|
||||
"histogram",
|
||||
JSON.stringify({ count: h.count, sum: h.sum }),
|
||||
h.sum,
|
||||
ts,
|
||||
sessionId,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
logWarning("metrics-central", `DB persist failed: ${err.message}`);
|
||||
|
|
@ -334,14 +370,23 @@ function flushMetrics() {
|
|||
_flushFailures = 0;
|
||||
} catch (err) {
|
||||
_flushFailures++;
|
||||
logWarning("metrics-central", `Flush failed (attempt ${_flushFailures}): ${err.message}`);
|
||||
logWarning(
|
||||
"metrics-central",
|
||||
`Flush failed (attempt ${_flushFailures}): ${err.message}`,
|
||||
);
|
||||
if (_flushFailures < FLUSH_RETRY_MAX) {
|
||||
const delay = FLUSH_RETRY_BASE_MS * Math.pow(2, _flushFailures - 1);
|
||||
const delay = FLUSH_RETRY_BASE_MS * 2 ** (_flushFailures - 1);
|
||||
setTimeout(flushMetrics, delay);
|
||||
} else {
|
||||
// Record flush failure as a metric
|
||||
try {
|
||||
getRegistry().counter("sf_metrics_flush_failed_total", "Total metrics flush failures", []).inc({}, 1);
|
||||
getRegistry()
|
||||
.counter(
|
||||
"sf_metrics_flush_failed_total",
|
||||
"Total metrics flush failures",
|
||||
[],
|
||||
)
|
||||
.inc({}, 1);
|
||||
} catch {
|
||||
// Best effort
|
||||
}
|
||||
|
|
@ -404,7 +449,9 @@ export function recordCounter(name, labels = {}, amount = 1) {
|
|||
if (_sessionId && !labels.session_id) {
|
||||
labels = { ...labels, session_id: _sessionId };
|
||||
}
|
||||
getRegistry().counter(name, meta.help, Object.keys(labels)).inc(labels, amount);
|
||||
getRegistry()
|
||||
.counter(name, meta.help, Object.keys(labels))
|
||||
.inc(labels, amount);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -445,7 +492,14 @@ export function recordHistogram(name, value) {
|
|||
* @param {number} cost — cost in USD
|
||||
* @param {string} [workMode] — current work mode
|
||||
*/
|
||||
export function recordCost(unitId, modelId, inputTokens, outputTokens, cost, workMode = "") {
|
||||
export function recordCost(
|
||||
unitId,
|
||||
modelId,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cost,
|
||||
workMode = "",
|
||||
) {
|
||||
const labels = { unit_id: unitId, model_id: modelId };
|
||||
if (workMode) labels.work_mode = workMode;
|
||||
recordCounter("sf_cost_total", labels, cost);
|
||||
|
|
@ -510,114 +564,114 @@ export function queryMetrics(db, sessionId = null, name = null, limit = 1000) {
|
|||
|
||||
const METRIC_META = {
|
||||
// Subagent inheritance
|
||||
"sf_subagent_dispatch_total": {
|
||||
sf_subagent_dispatch_total: {
|
||||
help: "Total subagent dispatch attempts",
|
||||
labels: ["work_mode", "permission_profile"],
|
||||
},
|
||||
"sf_subagent_dispatch_blocked": {
|
||||
sf_subagent_dispatch_blocked: {
|
||||
help: "Subagent dispatches blocked by inheritance policy",
|
||||
labels: ["reason", "work_mode", "permission_profile"],
|
||||
},
|
||||
"sf_subagent_dispatch_allowed": {
|
||||
sf_subagent_dispatch_allowed: {
|
||||
help: "Subagent dispatches allowed after inheritance check",
|
||||
labels: ["work_mode", "permission_profile"],
|
||||
},
|
||||
|
||||
// Mode transitions
|
||||
"sf_mode_transition_total": {
|
||||
sf_mode_transition_total: {
|
||||
help: "Total mode transitions",
|
||||
labels: ["axis", "from", "to", "reason"],
|
||||
},
|
||||
|
||||
// Task frontmatter
|
||||
"sf_task_created_total": {
|
||||
sf_task_created_total: {
|
||||
help: "Total tasks created with frontmatter",
|
||||
labels: ["risk_level", "mutation_scope"],
|
||||
},
|
||||
"sf_task_parallel_blocked": {
|
||||
sf_task_parallel_blocked: {
|
||||
help: "Tasks blocked from parallel execution by frontmatter",
|
||||
labels: ["reason"],
|
||||
},
|
||||
|
||||
// Parallel intent
|
||||
"sf_parallel_intent_declared": {
|
||||
sf_parallel_intent_declared: {
|
||||
help: "Parallel worker intents declared",
|
||||
labels: ["milestone_id"],
|
||||
},
|
||||
"sf_parallel_intent_conflict": {
|
||||
sf_parallel_intent_conflict: {
|
||||
help: "Parallel intent conflicts detected",
|
||||
labels: ["milestone_id"],
|
||||
},
|
||||
|
||||
// Remote steering
|
||||
"sf_remote_steering_applied": {
|
||||
sf_remote_steering_applied: {
|
||||
help: "Remote steering directives applied",
|
||||
labels: ["directive_type", "source"],
|
||||
},
|
||||
"sf_remote_steering_rejected": {
|
||||
sf_remote_steering_rejected: {
|
||||
help: "Remote steering directives rejected (throttle/invalid)",
|
||||
labels: ["reason"],
|
||||
},
|
||||
|
||||
// Skill eval
|
||||
"sf_skill_eval_runs_total": {
|
||||
sf_skill_eval_runs_total: {
|
||||
help: "Total skill evaluation runs",
|
||||
labels: ["skill_name", "passed"],
|
||||
},
|
||||
"sf_skill_eval_duration_ms": {
|
||||
sf_skill_eval_duration_ms: {
|
||||
help: "Skill evaluation duration in milliseconds",
|
||||
buckets: [100, 500, 1000, 5000, 10000, 30000],
|
||||
},
|
||||
|
||||
// Cost guard
|
||||
"sf_cost_guard_blocked": {
|
||||
sf_cost_guard_blocked: {
|
||||
help: "Units blocked by cost guard",
|
||||
labels: ["reason", "model_id"],
|
||||
},
|
||||
"sf_cost_guard_hourly_spend": {
|
||||
sf_cost_guard_hourly_spend: {
|
||||
help: "Current hourly spend in USD",
|
||||
},
|
||||
|
||||
// Gate runner
|
||||
"sf_gate_runs_total": {
|
||||
sf_gate_runs_total: {
|
||||
help: "Total gate executions",
|
||||
labels: ["gate_id", "outcome"],
|
||||
},
|
||||
"sf_gate_latency_ms": {
|
||||
sf_gate_latency_ms: {
|
||||
help: "Gate execution latency in milliseconds",
|
||||
buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000],
|
||||
},
|
||||
|
||||
// Message bus
|
||||
"sf_message_bus_messages_total": {
|
||||
sf_message_bus_messages_total: {
|
||||
help: "Total messages in bus",
|
||||
labels: ["agent_id"],
|
||||
},
|
||||
"sf_message_bus_unread_total": {
|
||||
sf_message_bus_unread_total: {
|
||||
help: "Unread messages in bus",
|
||||
labels: ["agent_id"],
|
||||
},
|
||||
|
||||
// Cost tracking
|
||||
"sf_cost_total": {
|
||||
sf_cost_total: {
|
||||
help: "Total cost in USD",
|
||||
labels: ["unit_id", "model_id", "work_mode"],
|
||||
},
|
||||
"sf_tokens_input_total": {
|
||||
sf_tokens_input_total: {
|
||||
help: "Total input tokens",
|
||||
labels: ["model_id"],
|
||||
},
|
||||
"sf_tokens_output_total": {
|
||||
sf_tokens_output_total: {
|
||||
help: "Total output tokens",
|
||||
labels: ["model_id"],
|
||||
},
|
||||
"sf_cost_last": {
|
||||
sf_cost_last: {
|
||||
help: "Last recorded cost in USD",
|
||||
labels: ["unit_id", "model_id"],
|
||||
},
|
||||
|
||||
// Internal
|
||||
"sf_metrics_flush_failed_total": {
|
||||
sf_metrics_flush_failed_total: {
|
||||
help: "Total metrics flush failures",
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -258,7 +258,14 @@ export function snapshotUnitMetrics(
|
|||
// Fire-and-forget: don't block the snapshot on metrics-central
|
||||
import("./metrics-central.js")
|
||||
.then(({ recordCost }) => {
|
||||
recordCost(unitId, model, tokens.input, tokens.output, cost, classifyUnitPhase(unitType));
|
||||
recordCost(
|
||||
unitId,
|
||||
model,
|
||||
tokens.input,
|
||||
tokens.output,
|
||||
cost,
|
||||
classifyUnitPhase(unitType),
|
||||
);
|
||||
})
|
||||
.catch(() => {
|
||||
// metrics-central is optional; never block snapshot
|
||||
|
|
|
|||
|
|
@ -16,7 +16,16 @@
|
|||
|
||||
import { getAutoSession } from "./auto/session.js";
|
||||
import { loadFile } from "./files.js";
|
||||
import { resolveMilestoneFile, resolveSliceFile, resolveSfRootFile } from "./paths.js";
|
||||
import {
|
||||
formatMemoriesForPrompt,
|
||||
getMemories,
|
||||
MEMORY_TYPES,
|
||||
} from "./memory-repository.js";
|
||||
import {
|
||||
resolveMilestoneFile,
|
||||
resolveSfRootFile,
|
||||
resolveSliceFile,
|
||||
} from "./paths.js";
|
||||
import { logWarning } from "./workflow-logger.js";
|
||||
|
||||
const REASONING_ASSIST_ENABLED = process.env.SF_REASONING_ASSIST === "1";
|
||||
|
|
@ -31,12 +40,21 @@ const REASONING_ASSIST_MAX_CHARS = 2000;
|
|||
* @param {object} ctx — dispatch context
|
||||
* @returns {string|null} — reasoning prompt or null if disabled
|
||||
*/
|
||||
export async function buildReasoningAssistPrompt(unitType, unitId, basePath, ctx) {
|
||||
export async function buildReasoningAssistPrompt(
|
||||
unitType,
|
||||
unitId,
|
||||
basePath,
|
||||
ctx,
|
||||
) {
|
||||
if (!REASONING_ASSIST_ENABLED) return null;
|
||||
|
||||
const parts = [];
|
||||
parts.push(`You are a senior engineering advisor. The team is about to run a "${unitType}" unit (${unitId}).`);
|
||||
parts.push("Review the available context and write 3-5 sentences of strategic guidance:");
|
||||
parts.push(
|
||||
`You are a senior engineering advisor. The team is about to run a "${unitType}" unit (${unitId}).`,
|
||||
);
|
||||
parts.push(
|
||||
"Review the available context and write 3-5 sentences of strategic guidance:",
|
||||
);
|
||||
parts.push("- What should the agent focus on?");
|
||||
parts.push("- What common mistakes should it avoid?");
|
||||
parts.push("- What tools should it use and in what order?");
|
||||
|
|
@ -45,7 +63,12 @@ export async function buildReasoningAssistPrompt(unitType, unitId, basePath, ctx
|
|||
parts.push("");
|
||||
|
||||
// Load relevant context files
|
||||
const contextFiles = await loadRelevantContext(unitType, unitId, basePath);
|
||||
const contextFiles = await loadRelevantContext(
|
||||
unitType,
|
||||
unitId,
|
||||
basePath,
|
||||
ctx,
|
||||
);
|
||||
for (const { label, content } of contextFiles) {
|
||||
if (content) {
|
||||
parts.push(`--- ${label} ---`);
|
||||
|
|
@ -57,7 +80,7 @@ export async function buildReasoningAssistPrompt(unitType, unitId, basePath, ctx
|
|||
return parts.join("\n");
|
||||
}
|
||||
|
||||
async function loadRelevantContext(unitType, unitId, basePath) {
|
||||
async function loadRelevantContext(unitType, unitId, basePath, ctx) {
|
||||
const results = [];
|
||||
|
||||
// Parse unit ID
|
||||
|
|
@ -65,6 +88,64 @@ async function loadRelevantContext(unitType, unitId, basePath) {
|
|||
const milestoneId = segments[0];
|
||||
const sliceId = segments[1];
|
||||
|
||||
// Load structured memory (RA.Aid-style key facts/snippets/notes)
|
||||
const sessionId = ctx?.sessionManager?.getSessionId?.();
|
||||
if (sessionId) {
|
||||
const { getDatabase } = await import("./sf-db.js");
|
||||
const db = getDatabase();
|
||||
|
||||
// Key facts
|
||||
const keyFacts = getMemories({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
limit: 20,
|
||||
db,
|
||||
});
|
||||
if (keyFacts.length > 0) {
|
||||
const formatted = formatMemoriesForPrompt(keyFacts, {
|
||||
header: "Key Facts:",
|
||||
maxChars: 1500,
|
||||
});
|
||||
if (formatted) results.push({ label: "Key Facts", content: formatted });
|
||||
}
|
||||
|
||||
// Key snippets for implementation
|
||||
if (unitType.includes("execute") || unitType.includes("implement")) {
|
||||
const snippets = getMemories({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_SNIPPET,
|
||||
limit: 10,
|
||||
db,
|
||||
});
|
||||
if (snippets.length > 0) {
|
||||
const formatted = formatMemoriesForPrompt(snippets, {
|
||||
header: "Key Snippets:",
|
||||
maxChars: 2000,
|
||||
});
|
||||
if (formatted)
|
||||
results.push({ label: "Key Snippets", content: formatted });
|
||||
}
|
||||
}
|
||||
|
||||
// Research notes for research/planning
|
||||
if (unitType.includes("research") || unitType.includes("plan")) {
|
||||
const notes = getMemories({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.RESEARCH_NOTE,
|
||||
limit: 10,
|
||||
db,
|
||||
});
|
||||
if (notes.length > 0) {
|
||||
const formatted = formatMemoriesForPrompt(notes, {
|
||||
header: "Research Notes:",
|
||||
maxChars: 2000,
|
||||
});
|
||||
if (formatted)
|
||||
results.push({ label: "Research Notes", content: formatted });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load decisions
|
||||
const decisionsPath = resolveSfRootFile(basePath, "DECISIONS");
|
||||
if (decisionsPath) {
|
||||
|
|
@ -84,16 +165,23 @@ async function loadRelevantContext(unitType, unitId, basePath) {
|
|||
const contextPath = resolveMilestoneFile(basePath, milestoneId, "CONTEXT");
|
||||
if (contextPath) {
|
||||
const content = await loadFile(contextPath);
|
||||
if (content) results.push({ label: `Milestone ${milestoneId} Context`, content });
|
||||
if (content)
|
||||
results.push({ label: `Milestone ${milestoneId} Context`, content });
|
||||
}
|
||||
}
|
||||
|
||||
// Load slice research for planning/execution
|
||||
if (sliceId && (unitType.includes("plan") || unitType.includes("execute"))) {
|
||||
const researchPath = resolveSliceFile(basePath, milestoneId, sliceId, "RESEARCH");
|
||||
const researchPath = resolveSliceFile(
|
||||
basePath,
|
||||
milestoneId,
|
||||
sliceId,
|
||||
"RESEARCH",
|
||||
);
|
||||
if (researchPath) {
|
||||
const content = await loadFile(researchPath);
|
||||
if (content) results.push({ label: `Slice ${sliceId} Research`, content });
|
||||
if (content)
|
||||
results.push({ label: `Slice ${sliceId} Research`, content });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -87,12 +87,19 @@ function createAdapter(rawDb) {
|
|||
* Execute a database query with timeout protection.
|
||||
* Falls back to empty result if query exceeds timeout.
|
||||
*/
|
||||
function withQueryTimeout(operation, fallbackValue, timeoutMs = DB_QUERY_TIMEOUT_MS) {
|
||||
function withQueryTimeout(
|
||||
operation,
|
||||
fallbackValue,
|
||||
timeoutMs = DB_QUERY_TIMEOUT_MS,
|
||||
) {
|
||||
try {
|
||||
return operation();
|
||||
} catch (err) {
|
||||
if (err?.message?.includes("timeout") || err?.message?.includes("busy")) {
|
||||
logWarning("sf-db", `Query timed out after ${timeoutMs}ms, returning fallback`);
|
||||
logWarning(
|
||||
"sf-db",
|
||||
`Query timed out after ${timeoutMs}ms, returning fallback`,
|
||||
);
|
||||
return fallbackValue;
|
||||
}
|
||||
throw err;
|
||||
|
|
|
|||
|
|
@ -67,7 +67,8 @@ export async function runGrader(evalDir, _ctx) {
|
|||
grade(workDir),
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(
|
||||
() => reject(new Error(`Grader timed out after ${GRADER_TIMEOUT_MS}ms`)),
|
||||
() =>
|
||||
reject(new Error(`Grader timed out after ${GRADER_TIMEOUT_MS}ms`)),
|
||||
GRADER_TIMEOUT_MS,
|
||||
),
|
||||
),
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
*/
|
||||
|
||||
import { getAutoSession } from "./auto/session.js";
|
||||
import { recordCounter } from "./metrics-central.js";
|
||||
import {
|
||||
resolveModelMode,
|
||||
resolvePermissionProfile,
|
||||
|
|
@ -15,7 +16,6 @@ import {
|
|||
resolveWorkMode,
|
||||
} from "./operating-model.js";
|
||||
import { isProviderAllowedByLists } from "./preferences-models.js";
|
||||
import { recordCounter } from "./metrics-central.js";
|
||||
import { logWarning } from "./workflow-logger.js";
|
||||
|
||||
function providerFromModelId(modelId) {
|
||||
|
|
@ -98,7 +98,10 @@ export function validateSubagentDispatch(envelope, proposal) {
|
|||
envelope.blockedProviders,
|
||||
)
|
||||
) {
|
||||
logWarning("subagent-inheritance", `Blocked provider "${provider}" for subagent dispatch`);
|
||||
logWarning(
|
||||
"subagent-inheritance",
|
||||
`Blocked provider "${provider}" for subagent dispatch`,
|
||||
);
|
||||
recordCounter("sf_subagent_dispatch_blocked", {
|
||||
reason: "provider",
|
||||
work_mode: envelope.workMode,
|
||||
|
|
@ -111,7 +114,10 @@ export function validateSubagentDispatch(envelope, proposal) {
|
|||
}
|
||||
|
||||
if (envelope.modelMode === "fast" && isHeavyModelId(modelId)) {
|
||||
logWarning("subagent-inheritance", `Blocked heavy model "${modelId}" in fast mode`);
|
||||
logWarning(
|
||||
"subagent-inheritance",
|
||||
`Blocked heavy model "${modelId}" in fast mode`,
|
||||
);
|
||||
recordCounter("sf_subagent_dispatch_blocked", {
|
||||
reason: "model_mode",
|
||||
work_mode: envelope.workMode,
|
||||
|
|
@ -125,12 +131,20 @@ export function validateSubagentDispatch(envelope, proposal) {
|
|||
|
||||
if (envelope.permissionProfile === "restricted") {
|
||||
const proposedTools = proposal.tools ?? [];
|
||||
const RESTRICTED_TOOLS = new Set(["write", "edit", "bash", "mac_launch_app"]);
|
||||
const RESTRICTED_TOOLS = new Set([
|
||||
"write",
|
||||
"edit",
|
||||
"bash",
|
||||
"mac_launch_app",
|
||||
]);
|
||||
const blocked = proposedTools.filter((toolName) =>
|
||||
RESTRICTED_TOOLS.has(toolName.toLowerCase()),
|
||||
);
|
||||
if (blocked.length > 0) {
|
||||
logWarning("subagent-inheritance", `Blocked tools [${blocked.join(", ")}] in restricted mode`);
|
||||
logWarning(
|
||||
"subagent-inheritance",
|
||||
`Blocked tools [${blocked.join(", ")}] in restricted mode`,
|
||||
);
|
||||
recordCounter("sf_subagent_dispatch_blocked", {
|
||||
reason: "permission_profile",
|
||||
work_mode: envelope.workMode,
|
||||
|
|
|
|||
209
src/resources/extensions/sf/tests/memory-repository.test.mjs
Normal file
209
src/resources/extensions/sf/tests/memory-repository.test.mjs
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
/**
|
||||
* Memory Repository Tests
|
||||
*
|
||||
* Tests for structured memory storage (key facts, snippets, notes).
|
||||
*/
|
||||
|
||||
import { beforeEach, describe, expect, test } from "vitest";
|
||||
import {
|
||||
ensureMemoryTable,
|
||||
formatMemoriesForPrompt,
|
||||
getMemories,
|
||||
getMemoryCounts,
|
||||
MEMORY_TYPES,
|
||||
pruneMemories,
|
||||
storeMemory,
|
||||
} from "../memory-repository.js";
|
||||
import { closeDatabase, getDatabase, openDatabase } from "../sf-db.js";
|
||||
|
||||
describe("Memory Repository", () => {
|
||||
let db;
|
||||
let sessionId;
|
||||
|
||||
beforeEach(() => {
|
||||
closeDatabase();
|
||||
openDatabase(":memory:");
|
||||
db = getDatabase();
|
||||
sessionId = `test-session-${Date.now()}`;
|
||||
ensureMemoryTable(db);
|
||||
});
|
||||
|
||||
test("storeMemory_creates_key_fact", () => {
|
||||
const result = storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "SF uses SQLite for canonical state",
|
||||
db,
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
expect(result.created).toBe(true);
|
||||
expect(result.deduped).toBe(false);
|
||||
expect(result.id).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("storeMemory_dedupes_same_content", () => {
|
||||
const r1 = storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "Same content",
|
||||
db,
|
||||
});
|
||||
const r2 = storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "Same content",
|
||||
db,
|
||||
});
|
||||
expect(r1.id).toBe(r2.id);
|
||||
expect(r2.deduped).toBe(true);
|
||||
});
|
||||
|
||||
test("storeMemory_with_metadata", () => {
|
||||
const result = storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_SNIPPET,
|
||||
content: "const x = 1;",
|
||||
metadata: {
|
||||
filepath: "src/foo.js",
|
||||
line_number: 42,
|
||||
description: "Init",
|
||||
},
|
||||
db,
|
||||
});
|
||||
expect(result.created).toBe(true);
|
||||
const memories = getMemories({ sessionId, db });
|
||||
expect(memories[0].metadata).toEqual({
|
||||
filepath: "src/foo.js",
|
||||
line_number: 42,
|
||||
description: "Init",
|
||||
});
|
||||
});
|
||||
|
||||
test("getMemories_filters_by_type", () => {
|
||||
storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "Fact 1",
|
||||
db,
|
||||
});
|
||||
storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.RESEARCH_NOTE,
|
||||
content: "Note 1",
|
||||
db,
|
||||
});
|
||||
storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "Fact 2",
|
||||
db,
|
||||
});
|
||||
|
||||
const facts = getMemories({ sessionId, type: MEMORY_TYPES.KEY_FACT, db });
|
||||
expect(facts.length).toBe(2);
|
||||
expect(facts.every((m) => m.type === "key_fact")).toBe(true);
|
||||
});
|
||||
|
||||
test("getMemories_searches_by_query", () => {
|
||||
storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "SQLite is the canonical store",
|
||||
db,
|
||||
});
|
||||
storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "Node 26 is the target runtime",
|
||||
db,
|
||||
});
|
||||
|
||||
const results = getMemories({ sessionId, query: "SQLite", db });
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0].content).toContain("SQLite");
|
||||
});
|
||||
|
||||
test("getMemoryCounts_returns_counts_by_type", () => {
|
||||
storeMemory({ sessionId, type: MEMORY_TYPES.KEY_FACT, content: "F1", db });
|
||||
storeMemory({ sessionId, type: MEMORY_TYPES.KEY_FACT, content: "F2", db });
|
||||
storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.RESEARCH_NOTE,
|
||||
content: "N1",
|
||||
db,
|
||||
});
|
||||
|
||||
const counts = getMemoryCounts(sessionId, db);
|
||||
expect(counts.key_fact).toBe(2);
|
||||
expect(counts.research_note).toBe(1);
|
||||
});
|
||||
|
||||
test("formatMemoriesForPrompt_formats_with_header", () => {
|
||||
const memories = [
|
||||
{ type: "key_fact", content: "Fact A" },
|
||||
{ type: "key_fact", content: "Fact B" },
|
||||
];
|
||||
const formatted = formatMemoriesForPrompt(memories, {
|
||||
header: "Key Facts:",
|
||||
});
|
||||
expect(formatted).toContain("Key Facts:");
|
||||
expect(formatted).toContain("[key_fact] Fact A");
|
||||
expect(formatted).toContain("[key_fact] Fact B");
|
||||
});
|
||||
|
||||
test("formatMemoriesForPrompt_respects_maxChars", () => {
|
||||
const memories = Array.from({ length: 10 }, (_, i) => ({
|
||||
type: "key_fact",
|
||||
content: `This is a very long fact number ${i} with lots of content to test truncation`,
|
||||
}));
|
||||
const formatted = formatMemoriesForPrompt(memories, { maxChars: 100 });
|
||||
expect(formatted.length).toBeLessThanOrEqual(150); // header + some content + "..."
|
||||
expect(formatted).toContain("...");
|
||||
});
|
||||
|
||||
test("pruneMemories_deletes_old_entries", () => {
|
||||
storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "Old fact",
|
||||
db,
|
||||
});
|
||||
// Manually set created_at to past
|
||||
db.prepare(
|
||||
`UPDATE sf_memory SET created_at = '2020-01-01' WHERE session_id = ?`,
|
||||
).run(sessionId);
|
||||
|
||||
storeMemory({
|
||||
sessionId,
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "New fact",
|
||||
db,
|
||||
});
|
||||
|
||||
const deleted = pruneMemories(sessionId, "2025-01-01", db);
|
||||
expect(deleted).toBe(1);
|
||||
|
||||
const remaining = getMemories({ sessionId, db });
|
||||
expect(remaining.length).toBe(1);
|
||||
expect(remaining[0].content).toBe("New fact");
|
||||
});
|
||||
|
||||
test("storeMemory_rejects_invalid_type", () => {
|
||||
const result = storeMemory({
|
||||
sessionId,
|
||||
type: "invalid_type",
|
||||
content: "Should fail",
|
||||
db,
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("storeMemory_requires_sessionId", () => {
|
||||
const result = storeMemory({
|
||||
type: MEMORY_TYPES.KEY_FACT,
|
||||
content: "Should fail",
|
||||
db,
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
@ -1,96 +1,122 @@
|
|||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
initMetricsCentral,
|
||||
stopMetricsCentral,
|
||||
recordCounter,
|
||||
recordGauge,
|
||||
recordHistogram,
|
||||
getMetricsText,
|
||||
registerMetricMeta,
|
||||
recordCost,
|
||||
queryMetrics,
|
||||
getMetricsText,
|
||||
initMetricsCentral,
|
||||
queryMetrics,
|
||||
recordCost,
|
||||
recordCounter,
|
||||
recordGauge,
|
||||
recordHistogram,
|
||||
registerMetricMeta,
|
||||
stopMetricsCentral,
|
||||
} from "../metrics-central.js";
|
||||
|
||||
describe("metrics-central", () => {
|
||||
beforeEach(() => {
|
||||
initMetricsCentral("/tmp/test-project");
|
||||
});
|
||||
beforeEach(() => {
|
||||
initMetricsCentral("/tmp/test-project");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
stopMetricsCentral();
|
||||
});
|
||||
afterEach(() => {
|
||||
stopMetricsCentral();
|
||||
});
|
||||
|
||||
it("recordCounter_increments_and_exposes", () => {
|
||||
recordCounter("sf_test_counter", { label: "a" }, 3);
|
||||
recordCounter("sf_test_counter", { label: "a" }, 2);
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_test_counter{label="a"} 5');
|
||||
expect(text).toContain("# TYPE sf_test_counter counter");
|
||||
});
|
||||
it("recordCounter_increments_and_exposes", () => {
|
||||
recordCounter("sf_test_counter", { label: "a" }, 3);
|
||||
recordCounter("sf_test_counter", { label: "a" }, 2);
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_test_counter{label="a"} 5');
|
||||
expect(text).toContain("# TYPE sf_test_counter counter");
|
||||
});
|
||||
|
||||
it("recordGauge_sets_and_exposes", () => {
|
||||
recordGauge("sf_test_gauge", 42, { env: "prod" });
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_test_gauge{env="prod"} 42');
|
||||
expect(text).toContain("# TYPE sf_test_gauge gauge");
|
||||
});
|
||||
it("recordGauge_sets_and_exposes", () => {
|
||||
recordGauge("sf_test_gauge", 42, { env: "prod" });
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_test_gauge{env="prod"} 42');
|
||||
expect(text).toContain("# TYPE sf_test_gauge gauge");
|
||||
});
|
||||
|
||||
it("recordHistogram_observes_and_exposes_buckets", () => {
|
||||
registerMetricMeta("sf_test_hist", "Test histogram", [], [1, 5, 10]);
|
||||
recordHistogram("sf_test_hist", 3);
|
||||
recordHistogram("sf_test_hist", 7);
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_test_hist_bucket{le="1"} 0');
|
||||
expect(text).toContain('sf_test_hist_bucket{le="5"} 1');
|
||||
expect(text).toContain('sf_test_hist_bucket{le="10"} 2');
|
||||
expect(text).toContain("sf_test_hist_count 2");
|
||||
expect(text).toContain("sf_test_hist_sum 10");
|
||||
});
|
||||
it("recordHistogram_observes_and_exposes_buckets", () => {
|
||||
registerMetricMeta("sf_test_hist", "Test histogram", [], [1, 5, 10]);
|
||||
recordHistogram("sf_test_hist", 3);
|
||||
recordHistogram("sf_test_hist", 7);
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_test_hist_bucket{le="1"} 0');
|
||||
expect(text).toContain('sf_test_hist_bucket{le="5"} 1');
|
||||
expect(text).toContain('sf_test_hist_bucket{le="10"} 2');
|
||||
expect(text).toContain("sf_test_hist_count 2");
|
||||
expect(text).toContain("sf_test_hist_sum 10");
|
||||
});
|
||||
|
||||
it("subagent_metrics_tracked", () => {
|
||||
recordCounter("sf_subagent_dispatch_total", { work_mode: "build", permission_profile: "trusted" });
|
||||
recordCounter("sf_subagent_dispatch_blocked", { reason: "provider", work_mode: "build", permission_profile: "trusted" });
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_subagent_dispatch_total{permission_profile="trusted",work_mode="build"} 1');
|
||||
expect(text).toContain('sf_subagent_dispatch_blocked{permission_profile="trusted",reason="provider",work_mode="build"} 1');
|
||||
});
|
||||
it("subagent_metrics_tracked", () => {
|
||||
recordCounter("sf_subagent_dispatch_total", {
|
||||
work_mode: "build",
|
||||
permission_profile: "trusted",
|
||||
});
|
||||
recordCounter("sf_subagent_dispatch_blocked", {
|
||||
reason: "provider",
|
||||
work_mode: "build",
|
||||
permission_profile: "trusted",
|
||||
});
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain(
|
||||
'sf_subagent_dispatch_total{permission_profile="trusted",work_mode="build"} 1',
|
||||
);
|
||||
expect(text).toContain(
|
||||
'sf_subagent_dispatch_blocked{permission_profile="trusted",reason="provider",work_mode="build"} 1',
|
||||
);
|
||||
});
|
||||
|
||||
it("mode_transition_metrics_tracked", () => {
|
||||
recordCounter("sf_mode_transition_total", { axis: "work_mode", from: "chat", to: "build", reason: "user_command" });
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_mode_transition_total{axis="work_mode",from="chat",reason="user_command",to="build"} 1');
|
||||
});
|
||||
it("mode_transition_metrics_tracked", () => {
|
||||
recordCounter("sf_mode_transition_total", {
|
||||
axis: "work_mode",
|
||||
from: "chat",
|
||||
to: "build",
|
||||
reason: "user_command",
|
||||
});
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain(
|
||||
'sf_mode_transition_total{axis="work_mode",from="chat",reason="user_command",to="build"} 1',
|
||||
);
|
||||
});
|
||||
|
||||
it("session_id_auto_injected", () => {
|
||||
initMetricsCentral("/tmp/test-project", { sessionId: "sess-abc-123" });
|
||||
recordCounter("sf_test_session", { label: "x" });
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('session_id="sess-abc-123"');
|
||||
});
|
||||
it("session_id_auto_injected", () => {
|
||||
initMetricsCentral("/tmp/test-project", { sessionId: "sess-abc-123" });
|
||||
recordCounter("sf_test_session", { label: "x" });
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('session_id="sess-abc-123"');
|
||||
});
|
||||
|
||||
it("cost_metrics_tracked", () => {
|
||||
recordCost("unit-42", "claude-sonnet-4", 1500, 800, 0.045, "build");
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('sf_cost_total{model_id="claude-sonnet-4",unit_id="unit-42",work_mode="build"} 0.045');
|
||||
expect(text).toContain('sf_tokens_input_total{model_id="claude-sonnet-4"} 1500');
|
||||
expect(text).toContain('sf_tokens_output_total{model_id="claude-sonnet-4"} 800');
|
||||
expect(text).toContain('sf_cost_last{model_id="claude-sonnet-4",unit_id="unit-42"} 0.045');
|
||||
});
|
||||
it("cost_metrics_tracked", () => {
|
||||
recordCost("unit-42", "claude-sonnet-4", 1500, 800, 0.045, "build");
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain(
|
||||
'sf_cost_total{model_id="claude-sonnet-4",unit_id="unit-42",work_mode="build"} 0.045',
|
||||
);
|
||||
expect(text).toContain(
|
||||
'sf_tokens_input_total{model_id="claude-sonnet-4"} 1500',
|
||||
);
|
||||
expect(text).toContain(
|
||||
'sf_tokens_output_total{model_id="claude-sonnet-4"} 800',
|
||||
);
|
||||
expect(text).toContain(
|
||||
'sf_cost_last{model_id="claude-sonnet-4",unit_id="unit-42"} 0.045',
|
||||
);
|
||||
});
|
||||
|
||||
it("invalid_metric_name_rejected", () => {
|
||||
expect(() => recordCounter("bad name with spaces", {})).toThrow();
|
||||
expect(() => recordCounter("123_starts_with_number", {})).toThrow();
|
||||
expect(() => recordCounter("", {})).toThrow();
|
||||
});
|
||||
it("invalid_metric_name_rejected", () => {
|
||||
expect(() => recordCounter("bad name with spaces", {})).toThrow();
|
||||
expect(() => recordCounter("123_starts_with_number", {})).toThrow();
|
||||
expect(() => recordCounter("", {})).toThrow();
|
||||
});
|
||||
|
||||
it("label_escaping_handles_special_chars", () => {
|
||||
recordCounter("sf_test_escape", { key: "a=b,c" });
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('key="a=b,c"');
|
||||
});
|
||||
it("label_escaping_handles_special_chars", () => {
|
||||
recordCounter("sf_test_escape", { key: "a=b,c" });
|
||||
const text = getMetricsText();
|
||||
expect(text).toContain('key="a=b,c"');
|
||||
});
|
||||
|
||||
it("queryMetrics_returns_empty_without_db", () => {
|
||||
const results = queryMetrics(null, "sess-1", "sf_test");
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
it("queryMetrics_returns_empty_without_db", () => {
|
||||
const results = queryMetrics(null, "sess-1", "sf_test");
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
210
src/resources/extensions/sf/tests/trajectory-recorder.test.mjs
Normal file
210
src/resources/extensions/sf/tests/trajectory-recorder.test.mjs
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
/**
|
||||
* Trajectory Recorder Tests
|
||||
*/
|
||||
|
||||
import { beforeEach, describe, expect, test } from "vitest";
|
||||
import { closeDatabase, getDatabase, openDatabase } from "../sf-db.js";
|
||||
import {
|
||||
clearTrajectory,
|
||||
ensureTrajectoryTable,
|
||||
formatTrajectory,
|
||||
getTrajectory,
|
||||
getTrajectorySummary,
|
||||
recordTrajectoryStep,
|
||||
STEP_TYPES,
|
||||
} from "../trajectory-recorder.js";
|
||||
|
||||
describe("Trajectory Recorder", () => {
|
||||
let db;
|
||||
let sessionId;
|
||||
|
||||
beforeEach(() => {
|
||||
closeDatabase();
|
||||
openDatabase(":memory:");
|
||||
db = getDatabase();
|
||||
sessionId = `test-session-${Date.now()}`;
|
||||
ensureTrajectoryTable(db);
|
||||
});
|
||||
|
||||
test("recordTrajectoryStep_creates_tool_call", () => {
|
||||
const result = recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
toolName: "readFile",
|
||||
toolParams: { path: "src/foo.js" },
|
||||
db,
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
expect(result.id).toBeGreaterThan(0);
|
||||
expect(result.stepNumber).toBe(1);
|
||||
});
|
||||
|
||||
test("recordTrajectoryStep_increments_step_number", () => {
|
||||
const r1 = recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
toolName: "readFile",
|
||||
db,
|
||||
});
|
||||
const r2 = recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_RESULT,
|
||||
toolName: "readFile",
|
||||
toolResult: "content",
|
||||
db,
|
||||
});
|
||||
expect(r1.stepNumber).toBe(1);
|
||||
expect(r2.stepNumber).toBe(2);
|
||||
});
|
||||
|
||||
test("recordTrajectoryStep_records_error", () => {
|
||||
const result = recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.ERROR,
|
||||
isError: true,
|
||||
errorMessage: "File not found",
|
||||
errorType: "FileNotFoundError",
|
||||
errorDetails: "Stack trace here",
|
||||
db,
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
|
||||
const steps = getTrajectory({ sessionId, db });
|
||||
expect(steps[0].isError).toBe(true);
|
||||
expect(steps[0].errorMessage).toBe("File not found");
|
||||
expect(steps[0].errorType).toBe("FileNotFoundError");
|
||||
});
|
||||
|
||||
test("recordTrajectoryStep_records_cost_and_tokens", () => {
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.LLM_CALL,
|
||||
llmModel: "claude-3-opus",
|
||||
inputTokens: 1000,
|
||||
outputTokens: 500,
|
||||
stepCost: 0.015,
|
||||
db,
|
||||
});
|
||||
|
||||
const steps = getTrajectory({ sessionId, db });
|
||||
expect(steps[0].inputTokens).toBe(1000);
|
||||
expect(steps[0].outputTokens).toBe(500);
|
||||
expect(steps[0].stepCost).toBe(0.015);
|
||||
});
|
||||
|
||||
test("getTrajectory_filters_by_type", () => {
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
toolName: "a",
|
||||
db,
|
||||
});
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.LLM_CALL,
|
||||
llmModel: "x",
|
||||
db,
|
||||
});
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
toolName: "b",
|
||||
db,
|
||||
});
|
||||
|
||||
const toolSteps = getTrajectory({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
db,
|
||||
});
|
||||
expect(toolSteps.length).toBe(2);
|
||||
expect(toolSteps.every((s) => s.stepType === "tool_call")).toBe(true);
|
||||
});
|
||||
|
||||
test("getTrajectorySummary_returns_stats", () => {
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
toolName: "a",
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
stepCost: 0.01,
|
||||
db,
|
||||
});
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
toolName: "b",
|
||||
inputTokens: 200,
|
||||
outputTokens: 100,
|
||||
stepCost: 0.02,
|
||||
db,
|
||||
});
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.ERROR,
|
||||
isError: true,
|
||||
db,
|
||||
});
|
||||
|
||||
const summary = getTrajectorySummary(sessionId, null, db);
|
||||
expect(summary.totalSteps).toBe(3);
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.totalInputTokens).toBe(300);
|
||||
expect(summary.totalOutputTokens).toBe(150);
|
||||
expect(summary.totalCost).toBeCloseTo(0.03, 4);
|
||||
expect(summary.uniqueTools).toBe(2);
|
||||
});
|
||||
|
||||
test("formatTrajectory_formats_steps", () => {
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
toolName: "readFile",
|
||||
inputTokens: 100,
|
||||
stepCost: 0.01,
|
||||
db,
|
||||
});
|
||||
recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: STEP_TYPES.LLM_CALL,
|
||||
llmModel: "claude",
|
||||
inputTokens: 1000,
|
||||
outputTokens: 500,
|
||||
stepCost: 0.015,
|
||||
db,
|
||||
});
|
||||
|
||||
const steps = getTrajectory({ sessionId, db });
|
||||
const formatted = formatTrajectory(steps);
|
||||
expect(formatted).toContain("Trajectory (2 steps total");
|
||||
expect(formatted).toContain("Tool: readFile");
|
||||
expect(formatted).toContain("LLM: claude");
|
||||
});
|
||||
|
||||
test("clearTrajectory_deletes_steps", () => {
|
||||
recordTrajectoryStep({ sessionId, stepType: STEP_TYPES.TOOL_CALL, db });
|
||||
const deleted = clearTrajectory(sessionId, db);
|
||||
expect(deleted).toBe(1);
|
||||
|
||||
const steps = getTrajectory({ sessionId, db });
|
||||
expect(steps.length).toBe(0);
|
||||
});
|
||||
|
||||
test("recordTrajectoryStep_rejects_invalid_type", () => {
|
||||
const result = recordTrajectoryStep({
|
||||
sessionId,
|
||||
stepType: "invalid_type",
|
||||
db,
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("recordTrajectoryStep_requires_sessionId", () => {
|
||||
const result = recordTrajectoryStep({
|
||||
stepType: STEP_TYPES.TOOL_CALL,
|
||||
db,
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
83
src/resources/extensions/sf/trajectory-command.js
Normal file
83
src/resources/extensions/sf/trajectory-command.js
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
* Trajectory Command — /trajectory handler for SF.
|
||||
*
|
||||
* Purpose: Display step-by-step execution trace with costs, errors,
|
||||
* and tool usage for the current session or a specific unit.
|
||||
*
|
||||
* Consumer: ops.js command dispatcher.
|
||||
*/
|
||||
|
||||
import { getDatabase, isDbAvailable } from "./sf-db.js";
|
||||
import {
|
||||
formatTrajectory,
|
||||
getTrajectory,
|
||||
getTrajectorySummary,
|
||||
} from "./trajectory-recorder.js";
|
||||
|
||||
/**
|
||||
* Handle the /trajectory command.
|
||||
*
|
||||
* @param {string} args — command arguments
|
||||
* @param {object} ctx — command context
|
||||
* @param {string} basePath — project root
|
||||
*/
|
||||
export async function handleTrajectory(args, ctx, basePath) {
|
||||
if (!isDbAvailable()) {
|
||||
ctx.ui.notify(
|
||||
"Trajectory recording requires a database connection.",
|
||||
"warning",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const db = getDatabase();
|
||||
const sessionId = ctx.sessionManager?.getSessionId?.() || "default";
|
||||
|
||||
// Parse flags
|
||||
const flags = args.split(/\s+/).filter(Boolean);
|
||||
const showAll = flags.includes("--all");
|
||||
const showErrors = flags.includes("--errors");
|
||||
const showTools = flags.includes("--tools");
|
||||
const showLLM = flags.includes("--llm");
|
||||
const limit = flags.find((f) => f.startsWith("--limit="))
|
||||
? parseInt(flags.find((f) => f.startsWith("--limit=")).split("=")[1], 10)
|
||||
: 50;
|
||||
|
||||
// Get trajectory steps
|
||||
const filter = {};
|
||||
if (showErrors) filter.stepType = "error";
|
||||
else if (showTools) filter.stepType = "tool_call";
|
||||
else if (showLLM) filter.stepType = "llm_call";
|
||||
|
||||
const steps = getTrajectory({
|
||||
sessionId: showAll ? undefined : sessionId,
|
||||
...filter,
|
||||
limit,
|
||||
db,
|
||||
});
|
||||
|
||||
if (steps.length === 0) {
|
||||
ctx.ui.notify("No trajectory steps recorded yet.", "info");
|
||||
return;
|
||||
}
|
||||
|
||||
// Get summary
|
||||
const summary = getTrajectorySummary(sessionId, null, db);
|
||||
|
||||
// Build output
|
||||
const lines = [];
|
||||
lines.push(`Trajectory for session ${sessionId.slice(0, 8)}`);
|
||||
|
||||
if (summary) {
|
||||
lines.push(
|
||||
`Steps: ${summary.totalSteps} | Errors: ${summary.errorCount} | ` +
|
||||
`Tokens: ${summary.totalInputTokens + summary.totalOutputTokens} | ` +
|
||||
`Cost: $${summary.totalCost.toFixed(4)} | Tools: ${summary.uniqueTools}`,
|
||||
);
|
||||
}
|
||||
|
||||
lines.push("");
|
||||
lines.push(formatTrajectory(steps, { maxSteps: limit }));
|
||||
|
||||
ctx.ui.notify(lines.join("\n"), "info");
|
||||
}
|
||||
399
src/resources/extensions/sf/trajectory-recorder.js
Normal file
399
src/resources/extensions/sf/trajectory-recorder.js
Normal file
|
|
@ -0,0 +1,399 @@
|
|||
/**
|
||||
* Trajectory Recorder — Step-by-step execution trace for SF units.
|
||||
*
|
||||
* Purpose: Provide RA.Aid-style trajectory recording with per-step
|
||||
* tool execution details, costs, tokens, errors, and results.
|
||||
*
|
||||
* Consumer: research units, planning units, execute-task units, /trajectory command.
|
||||
*
|
||||
* Design:
|
||||
* - SQLite-backed with JSONB-like flexibility for tool params/results
|
||||
* - Session + unit scoped
|
||||
* - Cost and token tracking per step
|
||||
* - Error recording with full context
|
||||
* - Exportable for analysis and debugging
|
||||
*/
|
||||
|
||||
import { debugLog } from "./debug-logger.js";
|
||||
import { isDbAvailable } from "./sf-db.js";
|
||||
import { logWarning } from "./workflow-logger.js";
|
||||
|
||||
const TRAJECTORY_TABLE = "sf_trajectory";
|
||||
|
||||
/**
|
||||
* Trajectory step types.
|
||||
*/
|
||||
export const STEP_TYPES = {
|
||||
TOOL_CALL: "tool_call",
|
||||
TOOL_RESULT: "tool_result",
|
||||
LLM_CALL: "llm_call",
|
||||
LLM_RESPONSE: "llm_response",
|
||||
USER_INPUT: "user_input",
|
||||
STAGE_TRANSITION: "stage_transition",
|
||||
ERROR: "error",
|
||||
CHECKPOINT: "checkpoint",
|
||||
};
|
||||
|
||||
/**
|
||||
* Ensure the trajectory table exists.
|
||||
*/
|
||||
export function ensureTrajectoryTable(db) {
|
||||
if (!db) return false;
|
||||
try {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS ${TRAJECTORY_TABLE} (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
unit_id TEXT,
|
||||
step_type TEXT NOT NULL CHECK(step_type IN ('tool_call', 'tool_result', 'llm_call', 'llm_response', 'user_input', 'stage_transition', 'error', 'checkpoint')),
|
||||
step_number INTEGER NOT NULL,
|
||||
tool_name TEXT,
|
||||
tool_params TEXT, -- JSON
|
||||
tool_result TEXT, -- JSON or text
|
||||
llm_model TEXT,
|
||||
llm_prompt TEXT,
|
||||
llm_response TEXT,
|
||||
input_tokens INTEGER,
|
||||
output_tokens INTEGER,
|
||||
step_cost REAL,
|
||||
is_error INTEGER NOT NULL DEFAULT 0,
|
||||
error_message TEXT,
|
||||
error_type TEXT,
|
||||
error_details TEXT,
|
||||
metadata TEXT, -- JSON: {duration_ms, retry_count, etc}
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_trajectory_session ON ${TRAJECTORY_TABLE}(session_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_trajectory_unit ON ${TRAJECTORY_TABLE}(unit_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_trajectory_type ON ${TRAJECTORY_TABLE}(step_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_trajectory_created ON ${TRAJECTORY_TABLE}(created_at DESC);
|
||||
`);
|
||||
return true;
|
||||
} catch (err) {
|
||||
logWarning("trajectory", "Failed to ensure trajectory table", {
|
||||
error: String(err),
|
||||
});
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const stepCounter = new Map(); // sessionId -> current step number
|
||||
|
||||
function getNextStepNumber(sessionId) {
|
||||
const current = stepCounter.get(sessionId) || 0;
|
||||
const next = current + 1;
|
||||
stepCounter.set(sessionId, next);
|
||||
return next;
|
||||
}
|
||||
|
||||
function resetStepCounter(sessionId) {
|
||||
stepCounter.set(sessionId, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a trajectory step.
|
||||
*
|
||||
* @param {object} params
|
||||
* @param {string} params.sessionId — required
|
||||
* @param {string} [params.unitId] — optional unit tag
|
||||
* @param {string} params.stepType — STEP_TYPES value
|
||||
* @param {string} [params.toolName] — tool name for tool_call/tool_result
|
||||
* @param {object} [params.toolParams] — tool parameters
|
||||
* @param {any} [params.toolResult] — tool result
|
||||
* @param {string} [params.llmModel] — model identifier
|
||||
* @param {string} [params.llmPrompt] — prompt text
|
||||
* @param {string} [params.llmResponse] — response text
|
||||
* @param {number} [params.inputTokens] — input token count
|
||||
* @param {number} [params.outputTokens] — output token count
|
||||
* @param {number} [params.stepCost] — cost in USD
|
||||
* @param {boolean} [params.isError=false] — whether this step errored
|
||||
* @param {string} [params.errorMessage] — error message
|
||||
* @param {string} [params.errorType] — error type/class
|
||||
* @param {string} [params.errorDetails] — detailed error info
|
||||
* @param {object} [params.metadata] — additional metadata
|
||||
* @param {object} [params.db] — database connection
|
||||
* @returns {{id: number}|null}
|
||||
*/
|
||||
export function recordTrajectoryStep({
|
||||
sessionId,
|
||||
unitId,
|
||||
stepType,
|
||||
toolName,
|
||||
toolParams,
|
||||
toolResult,
|
||||
llmModel,
|
||||
llmPrompt,
|
||||
llmResponse,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
stepCost,
|
||||
isError = false,
|
||||
errorMessage,
|
||||
errorType,
|
||||
errorDetails,
|
||||
metadata,
|
||||
db,
|
||||
}) {
|
||||
if (!sessionId || !stepType) {
|
||||
logWarning("trajectory", "recordTrajectoryStep missing required fields", {
|
||||
sessionId,
|
||||
stepType,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
if (!Object.values(STEP_TYPES).includes(stepType)) {
|
||||
logWarning("trajectory", "Invalid step type", { stepType });
|
||||
return null;
|
||||
}
|
||||
|
||||
const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null);
|
||||
if (!dbConn) return null;
|
||||
|
||||
ensureTrajectoryTable(dbConn);
|
||||
|
||||
const stepNumber = getNextStepNumber(sessionId);
|
||||
|
||||
try {
|
||||
const result = dbConn
|
||||
.prepare(
|
||||
`INSERT INTO ${TRAJECTORY_TABLE} (
|
||||
session_id, unit_id, step_type, step_number, tool_name,
|
||||
tool_params, tool_result, llm_model, llm_prompt, llm_response,
|
||||
input_tokens, output_tokens, step_cost, is_error,
|
||||
error_message, error_type, error_details, metadata
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run(
|
||||
sessionId,
|
||||
unitId || null,
|
||||
stepType,
|
||||
stepNumber,
|
||||
toolName || null,
|
||||
toolParams ? JSON.stringify(toolParams) : null,
|
||||
toolResult !== undefined
|
||||
? typeof toolResult === "string"
|
||||
? toolResult
|
||||
: JSON.stringify(toolResult)
|
||||
: null,
|
||||
llmModel || null,
|
||||
llmPrompt || null,
|
||||
llmResponse || null,
|
||||
inputTokens || null,
|
||||
outputTokens || null,
|
||||
stepCost || null,
|
||||
isError ? 1 : 0,
|
||||
errorMessage || null,
|
||||
errorType || null,
|
||||
errorDetails || null,
|
||||
metadata ? JSON.stringify(metadata) : null,
|
||||
);
|
||||
|
||||
return { id: Number(result.lastInsertRowid), stepNumber };
|
||||
} catch (err) {
|
||||
logWarning("trajectory", "recordTrajectoryStep failed", {
|
||||
error: String(err),
|
||||
sessionId,
|
||||
stepType,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get trajectory steps with optional filtering.
|
||||
*/
|
||||
export function getTrajectory({
|
||||
sessionId,
|
||||
unitId,
|
||||
stepType,
|
||||
limit = 100,
|
||||
db,
|
||||
}) {
|
||||
const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null);
|
||||
if (!dbConn) return [];
|
||||
|
||||
ensureTrajectoryTable(dbConn);
|
||||
|
||||
try {
|
||||
const conditions = ["session_id = ?"];
|
||||
const params = [sessionId];
|
||||
|
||||
if (unitId) {
|
||||
conditions.push("unit_id = ?");
|
||||
params.push(unitId);
|
||||
}
|
||||
if (stepType) {
|
||||
conditions.push("step_type = ?");
|
||||
params.push(stepType);
|
||||
}
|
||||
|
||||
const whereClause = conditions.join(" AND ");
|
||||
params.push(limit);
|
||||
|
||||
const rows = dbConn
|
||||
.prepare(
|
||||
`SELECT * FROM ${TRAJECTORY_TABLE}
|
||||
WHERE ${whereClause}
|
||||
ORDER BY step_number ASC
|
||||
LIMIT ?`,
|
||||
)
|
||||
.all(...params);
|
||||
|
||||
return rows.map((r) => ({
|
||||
id: r.id,
|
||||
sessionId: r.session_id,
|
||||
unitId: r.unit_id,
|
||||
stepType: r.step_type,
|
||||
stepNumber: r.step_number,
|
||||
toolName: r.tool_name,
|
||||
toolParams: r.tool_params ? JSON.parse(r.tool_params) : null,
|
||||
toolResult: r.tool_result ? tryParseJson(r.tool_result) : null,
|
||||
llmModel: r.llm_model,
|
||||
llmPrompt: r.llm_prompt,
|
||||
llmResponse: r.llm_response,
|
||||
inputTokens: r.input_tokens,
|
||||
outputTokens: r.output_tokens,
|
||||
stepCost: r.step_cost,
|
||||
isError: !!r.is_error,
|
||||
errorMessage: r.error_message,
|
||||
errorType: r.error_type,
|
||||
errorDetails: r.error_details,
|
||||
metadata: r.metadata ? JSON.parse(r.metadata) : null,
|
||||
createdAt: r.created_at,
|
||||
}));
|
||||
} catch (err) {
|
||||
logWarning("trajectory", "getTrajectory failed", {
|
||||
error: String(err),
|
||||
sessionId,
|
||||
});
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function tryParseJson(str) {
|
||||
try {
|
||||
return JSON.parse(str);
|
||||
} catch {
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get trajectory summary for a session/unit.
|
||||
*/
|
||||
export function getTrajectorySummary(sessionId, unitId, db) {
|
||||
const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null);
|
||||
if (!dbConn) return null;
|
||||
|
||||
ensureTrajectoryTable(dbConn);
|
||||
|
||||
try {
|
||||
const conditions = ["session_id = ?"];
|
||||
const params = [sessionId];
|
||||
|
||||
if (unitId) {
|
||||
conditions.push("unit_id = ?");
|
||||
params.push(unitId);
|
||||
}
|
||||
|
||||
const whereClause = conditions.join(" AND ");
|
||||
|
||||
const stats = dbConn
|
||||
.prepare(
|
||||
`SELECT
|
||||
COUNT(*) as total_steps,
|
||||
SUM(CASE WHEN is_error = 1 THEN 1 ELSE 0 END) as error_count,
|
||||
SUM(input_tokens) as total_input_tokens,
|
||||
SUM(output_tokens) as total_output_tokens,
|
||||
SUM(step_cost) as total_cost,
|
||||
COUNT(DISTINCT tool_name) as unique_tools,
|
||||
MAX(step_number) as max_step
|
||||
FROM ${TRAJECTORY_TABLE}
|
||||
WHERE ${whereClause}`,
|
||||
)
|
||||
.get(...params);
|
||||
|
||||
return {
|
||||
totalSteps: stats.total_steps,
|
||||
errorCount: stats.error_count,
|
||||
totalInputTokens: stats.total_input_tokens || 0,
|
||||
totalOutputTokens: stats.total_output_tokens || 0,
|
||||
totalCost: stats.total_cost || 0,
|
||||
uniqueTools: stats.unique_tools,
|
||||
maxStep: stats.max_step,
|
||||
};
|
||||
} catch (err) {
|
||||
logWarning("trajectory", "getTrajectorySummary failed", {
|
||||
error: String(err),
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format trajectory for display/export.
|
||||
*/
|
||||
export function formatTrajectory(steps, options = {}) {
|
||||
if (!steps || steps.length === 0) return "No trajectory steps recorded.";
|
||||
|
||||
const { maxSteps = 50, includeDetails = true } = options;
|
||||
const displaySteps = steps.slice(-maxSteps);
|
||||
|
||||
const lines = [
|
||||
`Trajectory (${steps.length} steps total, showing last ${displaySteps.length}):`,
|
||||
];
|
||||
|
||||
for (const step of displaySteps) {
|
||||
const prefix = step.isError ? "❌" : "✓";
|
||||
const cost = step.stepCost ? ` ($${step.stepCost.toFixed(4)})` : "";
|
||||
const tokens = step.inputTokens
|
||||
? ` [${step.inputTokens}/${step.outputTokens} tokens]`
|
||||
: "";
|
||||
|
||||
if (step.stepType === "tool_call") {
|
||||
lines.push(
|
||||
`${prefix} [${step.stepNumber}] Tool: ${step.toolName}${cost}${tokens}`,
|
||||
);
|
||||
if (includeDetails && step.toolParams) {
|
||||
const params = JSON.stringify(step.toolParams).slice(0, 200);
|
||||
lines.push(` Params: ${params}${params.length >= 200 ? "..." : ""}`);
|
||||
}
|
||||
} else if (step.stepType === "llm_call") {
|
||||
lines.push(
|
||||
`${prefix} [${step.stepNumber}] LLM: ${step.llmModel}${cost}${tokens}`,
|
||||
);
|
||||
} else if (step.stepType === "error") {
|
||||
lines.push(`${prefix} [${step.stepNumber}] Error: ${step.errorMessage}`);
|
||||
if (includeDetails && step.errorDetails) {
|
||||
lines.push(` Details: ${step.errorDetails.slice(0, 300)}`);
|
||||
}
|
||||
} else {
|
||||
lines.push(
|
||||
`${prefix} [${step.stepNumber}] ${step.stepType}${cost}${tokens}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear trajectory for a session.
|
||||
*/
|
||||
export function clearTrajectory(sessionId, db) {
|
||||
const dbConn = db || (isDbAvailable() ? require("./sf-db.js").getDb() : null);
|
||||
if (!dbConn) return 0;
|
||||
|
||||
ensureTrajectoryTable(dbConn);
|
||||
|
||||
try {
|
||||
const result = dbConn
|
||||
.prepare(`DELETE FROM ${TRAJECTORY_TABLE} WHERE session_id = ?`)
|
||||
.run(sessionId);
|
||||
resetStepCounter(sessionId);
|
||||
return result.changes;
|
||||
} catch (err) {
|
||||
logWarning("trajectory", "clearTrajectory failed", { error: String(err) });
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -110,7 +110,10 @@ export async function enrichGateResultWithMemory(gateResult, gateId) {
|
|||
}
|
||||
} catch (err) {
|
||||
// Degrade gracefully - memory enrichment never changes gate result
|
||||
logWarning("gate-runner", `Memory enrichment failed for gate ${gateId}: ${err instanceof Error ? err.message : String(err)}`);
|
||||
logWarning(
|
||||
"gate-runner",
|
||||
`Memory enrichment failed for gate ${gateId}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
|
||||
return gateResult;
|
||||
|
|
|
|||
|
|
@ -88,7 +88,9 @@ export function createTurnObserver(options) {
|
|||
permissionProfile: current.permissionProfile,
|
||||
}),
|
||||
}).catch((err) => {
|
||||
console.error(`[loop-adapter] Git transaction failed: ${err.message}`);
|
||||
console.error(
|
||||
`[loop-adapter] Git transaction failed: ${err.message}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
if (options.enableAudit) {
|
||||
|
|
@ -133,7 +135,9 @@ export function createTurnObserver(options) {
|
|||
status: "ok",
|
||||
metadata: nextSequenceMetadata("gitops", "update", { action }),
|
||||
}).catch((err) => {
|
||||
console.error(`[loop-adapter] Git transaction failed: ${err.message}`);
|
||||
console.error(
|
||||
`[loop-adapter] Git transaction failed: ${err.message}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
if (phase === "unit") {
|
||||
|
|
@ -149,7 +153,9 @@ export function createTurnObserver(options) {
|
|||
status: "ok",
|
||||
metadata: nextSequenceMetadata("gitops", "update", { action }),
|
||||
}).catch((err) => {
|
||||
console.error(`[loop-adapter] Git transaction failed: ${err.message}`);
|
||||
console.error(
|
||||
`[loop-adapter] Git transaction failed: ${err.message}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
if (phase === "finalize") {
|
||||
|
|
@ -165,7 +171,9 @@ export function createTurnObserver(options) {
|
|||
status: "ok",
|
||||
metadata: nextSequenceMetadata("gitops", "update", { action }),
|
||||
}).catch((err) => {
|
||||
console.error(`[loop-adapter] Git transaction failed: ${err.message}`);
|
||||
console.error(
|
||||
`[loop-adapter] Git transaction failed: ${err.message}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
},
|
||||
|
|
|
|||
|
|
@ -94,7 +94,9 @@ export function parseParityEvents(raw) {
|
|||
})
|
||||
.filter(Boolean);
|
||||
if (malformedCount > 0) {
|
||||
console.error(`[parity-report] Dropped ${malformedCount} malformed parity event(s)`);
|
||||
console.error(
|
||||
`[parity-report] Dropped ${malformedCount} malformed parity event(s)`,
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,9 @@ function detectCycles(nodes) {
|
|||
}
|
||||
}
|
||||
}
|
||||
const queue = nodes.filter((n) => (inDegree.get(n.id) ?? 0) === 0).map((n) => n.id);
|
||||
const queue = nodes
|
||||
.filter((n) => (inDegree.get(n.id) ?? 0) === 0)
|
||||
.map((n) => n.id);
|
||||
let visited = 0;
|
||||
while (queue.length > 0) {
|
||||
const current = queue.shift();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue