sf snapshot: uncommitted changes after 131m inactivity
This commit is contained in:
parent
5188b93ddc
commit
9875812c1b
44 changed files with 1149 additions and 87 deletions
3
.sf/backups/db/maintenance.json
Normal file
3
.sf/backups/db/maintenance.json
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"lastFullVacuumAt": "2026-05-08T20:15:21.317Z"
|
||||
}
|
||||
BIN
.sf/backups/db/sf.db.2026-05-08T20-20-34-822Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-08T20-20-34-822Z
Normal file
Binary file not shown.
BIN
.sf/backups/db/sf.db.2026-05-08T20-44-13-669Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-08T20-44-13-669Z
Normal file
Binary file not shown.
BIN
.sf/backups/db/sf.db.2026-05-08T22-14-57-817Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-08T22-14-57-817Z
Normal file
Binary file not shown.
BIN
.sf/backups/db/sf.db.2026-05-08T22-42-32-307Z
Normal file
BIN
.sf/backups/db/sf.db.2026-05-08T22-42-32-307Z
Normal file
Binary file not shown.
BIN
.sf/backups/db/sf.db.20260508-220250
Normal file
BIN
.sf/backups/db/sf.db.20260508-220250
Normal file
Binary file not shown.
BIN
.sf/recovery/sf.db-shm.corrupt-20260508-220021
Normal file
BIN
.sf/recovery/sf.db-shm.corrupt-20260508-220021
Normal file
Binary file not shown.
BIN
.sf/recovery/sf.db-shm.replaced-corrupt-20260508-220115
Normal file
BIN
.sf/recovery/sf.db-shm.replaced-corrupt-20260508-220115
Normal file
Binary file not shown.
0
.sf/recovery/sf.db-wal.corrupt-20260508-220021
Normal file
0
.sf/recovery/sf.db-wal.corrupt-20260508-220021
Normal file
0
.sf/recovery/sf.db-wal.replaced-corrupt-20260508-220115
Normal file
0
.sf/recovery/sf.db-wal.replaced-corrupt-20260508-220115
Normal file
BIN
.sf/recovery/sf.db.corrupt-20260508-220021
Normal file
BIN
.sf/recovery/sf.db.corrupt-20260508-220021
Normal file
Binary file not shown.
BIN
.sf/recovery/sf.db.recovered-20260508-220103
Normal file
BIN
.sf/recovery/sf.db.recovered-20260508-220103
Normal file
Binary file not shown.
BIN
.sf/recovery/sf.db.replaced-corrupt-20260508-220115
Normal file
BIN
.sf/recovery/sf.db.replaced-corrupt-20260508-220115
Normal file
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"lastFullVacuumAt": "2026-05-08T20:29:49.200Z"
|
||||
}
|
||||
Binary file not shown.
BIN
.sf/recovery/stray-root-20260509-023724/global
Normal file
BIN
.sf/recovery/stray-root-20260509-023724/global
Normal file
Binary file not shown.
15
package-lock.json
generated
15
package-lock.json
generated
|
|
@ -5708,6 +5708,10 @@
|
|||
"node_modules/@singularity-forge/engine-win32-x64-msvc": {
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/@singularity-forge/google-gemini-cli-provider": {
|
||||
"resolved": "packages/google-gemini-cli-provider",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@singularity-forge/native": {
|
||||
"resolved": "packages/native",
|
||||
"link": true
|
||||
|
|
@ -14618,6 +14622,16 @@
|
|||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
},
|
||||
"packages/google-gemini-cli-provider": {
|
||||
"name": "@singularity-forge/google-gemini-cli-provider",
|
||||
"version": "2.75.3",
|
||||
"dependencies": {
|
||||
"@google/gemini-cli-core": "0.40.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=26.1.0"
|
||||
}
|
||||
},
|
||||
"packages/native": {
|
||||
"name": "@singularity-forge/native",
|
||||
"version": "2.75.3",
|
||||
|
|
@ -14651,6 +14665,7 @@
|
|||
"@google/genai": "^1.40.0",
|
||||
"@mistralai/mistralai": "^2.2.1",
|
||||
"@sinclair/typebox": "^0.34.41",
|
||||
"@singularity-forge/google-gemini-cli-provider": "^2.75.3",
|
||||
"ajv": "^8.17.1",
|
||||
"ajv-formats": "^3.0.1",
|
||||
"chalk": "^5.6.2",
|
||||
|
|
|
|||
|
|
@ -48,7 +48,8 @@
|
|||
"build:pi-coding-agent": "npm --workspace @singularity-forge/pi-coding-agent run build",
|
||||
"build:native-pkg": "npm --workspace @singularity-forge/native run build",
|
||||
"build:rpc-client": "npm --workspace @singularity-forge/rpc-client run build",
|
||||
"build:pi": "npm run build:native-pkg && npm run build:pi-tui && npm run build:pi-ai && npm run build:pi-agent-core && npm run build:pi-coding-agent",
|
||||
"build:google-gemini-cli-provider": "npm --workspace @singularity-forge/google-gemini-cli-provider run build",
|
||||
"build:pi": "npm run build:native-pkg && npm run build:pi-tui && npm run build:google-gemini-cli-provider && npm run build:pi-ai && npm run build:pi-agent-core && npm run build:pi-coding-agent",
|
||||
"build:daemon": "npm --workspace @singularity-forge/daemon run build",
|
||||
"build:core": "npm run build:pi && npm run build:rpc-client && npm run build:daemon && npm run check:versioned-json && tsc && npm run copy-resources && npm run copy-themes && npm run copy-export-html",
|
||||
"build": "npm run build:core && node scripts/build-web-if-stale.cjs",
|
||||
|
|
|
|||
23
packages/google-gemini-cli-provider/package.json
Normal file
23
packages/google-gemini-cli-provider/package.json
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"name": "@singularity-forge/google-gemini-cli-provider",
|
||||
"version": "2.75.3",
|
||||
"description": "Gemini CLI Core transport helper for SF providers",
|
||||
"type": "module",
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc -p tsconfig.json"
|
||||
},
|
||||
"dependencies": {
|
||||
"@google/gemini-cli-core": "0.40.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=26.1.0"
|
||||
}
|
||||
}
|
||||
40
packages/google-gemini-cli-provider/src/index.test.ts
Normal file
40
packages/google-gemini-cli-provider/src/index.test.ts
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { describe, test, vi } from "vitest";
|
||||
|
||||
const helperState = vi.hoisted(() => ({
|
||||
authType: undefined as unknown,
|
||||
configParams: undefined as Record<string, unknown> | undefined,
|
||||
}));
|
||||
|
||||
vi.mock("@google/gemini-cli-core", () => ({
|
||||
AuthType: { LOGIN_WITH_GOOGLE: "LOGIN_WITH_GOOGLE" },
|
||||
makeFakeConfig: vi.fn((params: Record<string, unknown>) => {
|
||||
helperState.configParams = params;
|
||||
return { params };
|
||||
}),
|
||||
}));
|
||||
|
||||
vi.mock("@google/gemini-cli-core/dist/src/core/contentGenerator.js", () => ({
|
||||
createContentGeneratorConfig: vi.fn(async (_config, authType) => {
|
||||
helperState.authType = authType;
|
||||
return { authType };
|
||||
}),
|
||||
createContentGenerator: vi.fn(async () => ({
|
||||
async generateContentStream(): Promise<AsyncGenerator<unknown>> {
|
||||
return (async function* emptyStream() {})();
|
||||
},
|
||||
})),
|
||||
}));
|
||||
|
||||
import { createGeminiCliContentGenerator } from "./index.js";
|
||||
|
||||
describe("google-gemini-cli-provider", () => {
|
||||
test("createGeminiCliContentGenerator_uses_google_login_auth", async () => {
|
||||
await createGeminiCliContentGenerator({ modelId: "gemini-3-pro" });
|
||||
|
||||
assert.equal(helperState.authType, "LOGIN_WITH_GOOGLE");
|
||||
assert.equal(helperState.configParams?.model, "gemini-3-pro");
|
||||
assert.equal(helperState.configParams?.cwd, process.cwd());
|
||||
assert.equal(helperState.configParams?.targetDir, process.cwd());
|
||||
});
|
||||
});
|
||||
48
packages/google-gemini-cli-provider/src/index.ts
Normal file
48
packages/google-gemini-cli-provider/src/index.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Google Gemini CLI transport helper.
|
||||
*
|
||||
* Purpose: keep the Gemini CLI Core auth and content-generator wiring in a
|
||||
* dedicated workspace package so provider code can depend on one small helper
|
||||
* instead of embedding the upstream integration inline.
|
||||
*
|
||||
* Consumer: `@singularity-forge/pi-ai` Google Gemini provider.
|
||||
*/
|
||||
import {
|
||||
AuthType,
|
||||
makeFakeConfig,
|
||||
} from "@google/gemini-cli-core";
|
||||
import {
|
||||
createContentGenerator,
|
||||
createContentGeneratorConfig,
|
||||
type ContentGenerator,
|
||||
} from "@google/gemini-cli-core/dist/src/core/contentGenerator.js";
|
||||
|
||||
export interface GeminiCliContentGeneratorOptions {
|
||||
modelId: string;
|
||||
cwd?: string;
|
||||
targetDir?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Gemini CLI Core content generator for a model.
|
||||
*
|
||||
* Purpose: centralize the Code Assist setup and OAuth bootstrap logic in a
|
||||
* reusable package so SF's Gemini provider can stay focused on stream shaping.
|
||||
*
|
||||
* Consumer: the Google Gemini provider in pi-ai.
|
||||
*/
|
||||
export async function createGeminiCliContentGenerator(
|
||||
options: GeminiCliContentGeneratorOptions,
|
||||
): Promise<ContentGenerator> {
|
||||
const cwd = options.cwd ?? process.cwd();
|
||||
const config = makeFakeConfig({
|
||||
model: options.modelId,
|
||||
cwd,
|
||||
targetDir: options.targetDir ?? cwd,
|
||||
});
|
||||
const generatorConfig = await createContentGeneratorConfig(
|
||||
config,
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
);
|
||||
return createContentGenerator(generatorConfig, config);
|
||||
}
|
||||
28
packages/google-gemini-cli-provider/tsconfig.json
Normal file
28
packages/google-gemini-cli-provider/tsconfig.json
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2024",
|
||||
"module": "Node16",
|
||||
"lib": ["ES2024"],
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"incremental": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"inlineSources": true,
|
||||
"inlineSourceMap": false,
|
||||
"moduleResolution": "Node16",
|
||||
"resolveJsonModule": true,
|
||||
"allowImportingTsExtensions": false,
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"useDefineForClassFields": false,
|
||||
"types": ["node"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["src/**/*.ts"],
|
||||
"exclude": ["node_modules", "dist", "**/*.d.ts", "src/**/*.d.ts"]
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@
|
|||
"@google/gemini-cli-core": "0.40.1",
|
||||
"@google/genai": "^1.40.0",
|
||||
"@mistralai/mistralai": "^2.2.1",
|
||||
"@singularity-forge/google-gemini-cli-provider": "^2.75.3",
|
||||
"@sinclair/typebox": "^0.34.41",
|
||||
"ajv": "^8.17.1",
|
||||
"ajv-formats": "^3.0.1",
|
||||
|
|
|
|||
|
|
@ -5,41 +5,34 @@ import type { Context, Model } from "../types.js";
|
|||
const geminiCliCore = vi.hoisted(() => ({
|
||||
retryError: undefined as Error | undefined,
|
||||
retryOptions: undefined as Record<string, unknown> | undefined,
|
||||
fakeConfigParams: undefined as Record<string, unknown> | undefined,
|
||||
generatorAuthType: undefined as unknown,
|
||||
helperArgs: undefined as Record<string, unknown> | undefined,
|
||||
}));
|
||||
|
||||
vi.mock("@google/gemini-cli-core", () => ({
|
||||
AuthType: { LOGIN_WITH_GOOGLE: "LOGIN_WITH_GOOGLE" },
|
||||
CodeAssistServer: class {
|
||||
async generateContentStream(): Promise<AsyncGenerator<unknown>> {
|
||||
return (async function* emptyStream() {})();
|
||||
}
|
||||
},
|
||||
getOauthClient: vi.fn(async () => ({})),
|
||||
makeFakeConfig: vi.fn((params: Record<string, unknown>) => {
|
||||
geminiCliCore.fakeConfigParams = params;
|
||||
return { params };
|
||||
}),
|
||||
retryWithBackoff: vi.fn(
|
||||
async (_fn: unknown, options: Record<string, unknown>) => {
|
||||
geminiCliCore.retryOptions = options;
|
||||
throw geminiCliCore.retryError ?? new Error("quota exhausted");
|
||||
},
|
||||
),
|
||||
setupUser: vi.fn(async () => ({ projectId: "test-project" })),
|
||||
}));
|
||||
|
||||
vi.mock("@google/gemini-cli-core/dist/src/core/contentGenerator.js", () => ({
|
||||
createContentGeneratorConfig: vi.fn(async (_config, authType) => {
|
||||
geminiCliCore.generatorAuthType = authType;
|
||||
return { authType };
|
||||
}),
|
||||
createContentGenerator: vi.fn(async () => ({
|
||||
async generateContentStream(): Promise<AsyncGenerator<unknown>> {
|
||||
return (async function* emptyStream() {})();
|
||||
vi.mock("@singularity-forge/google-gemini-cli-provider", () => ({
|
||||
createGeminiCliContentGenerator: vi.fn(
|
||||
async (args: Record<string, unknown>) => {
|
||||
geminiCliCore.helperArgs = args;
|
||||
return {
|
||||
async generateContentStream(): Promise<AsyncGenerator<unknown>> {
|
||||
return (async function* emptyStream() {})();
|
||||
},
|
||||
};
|
||||
},
|
||||
})),
|
||||
),
|
||||
}));
|
||||
|
||||
import { streamGoogleGeminiCli } from "./google-gemini-cli.js";
|
||||
|
|
@ -82,12 +75,7 @@ describe("google-gemini-cli provider retry ownership", () => {
|
|||
| { maxAttempts?: unknown }
|
||||
| undefined;
|
||||
assert.equal(retryOptions?.maxAttempts, 1);
|
||||
assert.equal(
|
||||
geminiCliCore.fakeConfigParams?.model,
|
||||
"gemini-3-flash-preview",
|
||||
);
|
||||
assert.equal(geminiCliCore.fakeConfigParams?.clientName, undefined);
|
||||
assert.equal(geminiCliCore.generatorAuthType, "LOGIN_WITH_GOOGLE");
|
||||
assert.equal(geminiCliCore.helperArgs?.modelId, "gemini-3-flash-preview");
|
||||
assert.equal(result.stopReason, "error");
|
||||
assert.match(result.errorMessage ?? "", /exhausted your capacity/i);
|
||||
assert.equal(result.retryAfterMs, 54_000);
|
||||
|
|
|
|||
|
|
@ -1,24 +1,12 @@
|
|||
/**
|
||||
* Google Gemini CLI provider.
|
||||
*
|
||||
* Delegates auth, project discovery, and the Code Assist transport to
|
||||
* @google/gemini-cli-core — the library behind Google's Gemini tooling.
|
||||
* cli-core reads ~/.gemini/oauth_creds.json itself when present, refreshes tokens,
|
||||
* discovers the project (free-tier or whatever's onboarded server-side)
|
||||
* via setupUser(), and handles all the User-Agent / quota-classification details.
|
||||
* Delegates auth, project discovery, and the Code Assist transport setup to
|
||||
* the dedicated google-gemini-cli-provider package.
|
||||
* Request retry/fallback stays in the caller so SF can move to the next model.
|
||||
*/
|
||||
|
||||
import {
|
||||
AuthType,
|
||||
makeFakeConfig,
|
||||
retryWithBackoff,
|
||||
} from "@google/gemini-cli-core";
|
||||
import type { ContentGenerator } from "@google/gemini-cli-core/dist/src/core/contentGenerator.js";
|
||||
import {
|
||||
createContentGenerator,
|
||||
createContentGeneratorConfig,
|
||||
} from "@google/gemini-cli-core/dist/src/core/contentGenerator.js";
|
||||
import { retryWithBackoff } from "@google/gemini-cli-core";
|
||||
import type {
|
||||
Content,
|
||||
GenerateContentParameters,
|
||||
|
|
@ -55,6 +43,7 @@ import {
|
|||
isAutoReasoning,
|
||||
resolveReasoningLevel,
|
||||
} from "./simple-options.js";
|
||||
import { createGeminiCliContentGenerator } from "@singularity-forge/google-gemini-cli-provider";
|
||||
|
||||
/**
|
||||
* Thinking level for Gemini 3 models.
|
||||
|
|
@ -73,7 +62,8 @@ export type GoogleThinkingLevel =
|
|||
/**
|
||||
* Options for `streamGoogleGeminiCli()`.
|
||||
*
|
||||
* Delegates auth to cli-core (reads ~/.gemini/oauth_creds.json via `getOauthClient()`);
|
||||
* Delegates auth to the helper package (reads ~/.gemini/oauth_creds.json via
|
||||
* Gemini CLI Core's transport setup);
|
||||
* `projectId` is auto-discovered and not used by this provider (apiKey is ignored).
|
||||
* Thinking is configured separately from base `StreamOptions` because Gemini 2 and 3
|
||||
* models use incompatible enum formats (budgetTokens vs. level).
|
||||
|
|
@ -100,30 +90,6 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
|
|||
// Counter for generating unique tool call IDs
|
||||
let toolCallCounter = 0;
|
||||
|
||||
/**
|
||||
* Build a Code Assist content generator using cli-core's official content-generator path.
|
||||
*
|
||||
* Upstream Gemini CLI does not instantiate CodeAssistServer directly from the
|
||||
* caller. It creates a ContentGeneratorConfig, lets createContentGenerator()
|
||||
* build the GeminiCLI User-Agent and transport headers, then delegates to
|
||||
* createCodeAssistContentGenerator() for OAuth, setupUser(), and Code Assist.
|
||||
*
|
||||
* Both calls memoize internally inside cli-core — repeat invocations are
|
||||
* cheap.
|
||||
*/
|
||||
async function getCodeAssistServer(modelId: string): Promise<ContentGenerator> {
|
||||
const config = makeFakeConfig({
|
||||
model: modelId,
|
||||
cwd: process.cwd(),
|
||||
targetDir: process.cwd(),
|
||||
});
|
||||
const generatorConfig = await createContentGeneratorConfig(
|
||||
config,
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
);
|
||||
return createContentGenerator(generatorConfig, config);
|
||||
}
|
||||
|
||||
function parseDurationMs(value: string): number | undefined {
|
||||
const match = value.match(/(?:(\d+)h)?(?:(\d+)m)?(?:(\d+)s)?/i);
|
||||
if (!match || !match[0]) return undefined;
|
||||
|
|
@ -178,14 +144,14 @@ function isGemini3Model(modelId: string): boolean {
|
|||
}
|
||||
|
||||
/**
|
||||
* Stream a chat completion from Google Gemini via the cli-core transport.
|
||||
* Stream a chat completion from Google Gemini via the helper package and cli-core transport.
|
||||
*
|
||||
* Auth is handled transparently by cli-core (`getCodeAssistServer()` reads OAuth creds from
|
||||
* ~/.gemini/oauth_creds.json and triggers browser OAuth on first run). Project ID is auto-discovered
|
||||
* from the Code Assist API; `apiKey` is ignored. Casting the request as `any` works around the fact
|
||||
* that cli-core bundles its own nested `@google/genai` copy (nominal type split at packaging time;
|
||||
* runtime shapes are byte-identical). Returns a real-time stream emitting start, delta, end, and
|
||||
* error events that accumulate into an `AssistantMessage`.
|
||||
* The helper package owns the OAuth/bootstrap path against `@google/gemini-cli-core`, including
|
||||
* `~/.gemini/oauth_creds.json` and Gemini Code Assist project discovery. `apiKey` is ignored.
|
||||
* Casting the request as `any` works around the fact that cli-core bundles its own nested
|
||||
* `@google/genai` copy (nominal type split at packaging time; runtime shapes are byte-identical).
|
||||
* Returns a real-time stream emitting start, delta, end, and error events that accumulate into
|
||||
* an `AssistantMessage`.
|
||||
*/
|
||||
export const streamGoogleGeminiCli: StreamFunction<
|
||||
"google-gemini-cli",
|
||||
|
|
@ -222,9 +188,10 @@ export const streamGoogleGeminiCli: StreamFunction<
|
|||
if (nextReq !== undefined) {
|
||||
req = nextReq as GenerateContentParameters;
|
||||
}
|
||||
// cli-core handles auth + project discovery. SF uses cli-core directly
|
||||
// and does not spawn a separate provider CLI process.
|
||||
const server = await getCodeAssistServer(req.model);
|
||||
// cli-core handles auth + project discovery through the helper package.
|
||||
const server = await createGeminiCliContentGenerator({
|
||||
modelId: req.model,
|
||||
});
|
||||
const promptId = `pi-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`;
|
||||
// Cast through `any` — cli-core bundles its own nested @google/genai copy,
|
||||
// so TypeScript sees two structurally-identical-but-distinct Content types.
|
||||
|
|
@ -233,7 +200,6 @@ export const streamGoogleGeminiCli: StreamFunction<
|
|||
const streamGen = await retryWithBackoff(
|
||||
() => server.generateContentStream(req as any, promptId, "USER" as any),
|
||||
{
|
||||
authType: AuthType.LOGIN_WITH_GOOGLE,
|
||||
// SF owns cross-model fallback. Let cli-core classify quota errors,
|
||||
// but do not let it hold the turn through its 10-attempt retry loop.
|
||||
maxAttempts: 1,
|
||||
|
|
|
|||
|
|
@ -5,15 +5,15 @@ import { parseArgs } from "./args.js";
|
|||
describe("parseArgs", () => {
|
||||
it("parses optional-value extension flags with implicit and explicit values", () => {
|
||||
const extensionFlags = new Map([
|
||||
["genai-proxy", { type: "string" as const, allowNoValue: true }],
|
||||
["demo-flag", { type: "string" as const, allowNoValue: true }],
|
||||
]);
|
||||
const defaultFlagArgs = parseArgs(["--genai-proxy"], extensionFlags);
|
||||
const explicitFlagArgs = parseArgs(["--genai-proxy=8080"], extensionFlags);
|
||||
const defaultFlagArgs = parseArgs(["--demo-flag"], extensionFlags);
|
||||
const explicitFlagArgs = parseArgs(["--demo-flag=8080"], extensionFlags);
|
||||
|
||||
assert.deepEqual(
|
||||
[
|
||||
defaultFlagArgs.unknownFlags.get("genai-proxy"),
|
||||
explicitFlagArgs.unknownFlags.get("genai-proxy"),
|
||||
defaultFlagArgs.unknownFlags.get("demo-flag"),
|
||||
explicitFlagArgs.unknownFlags.get("demo-flag"),
|
||||
],
|
||||
[true, "8080"],
|
||||
);
|
||||
|
|
|
|||
|
|
@ -190,7 +190,7 @@ export class Editor implements Component, Focusable {
|
|||
private autocompleteDebounceTimer: ReturnType<typeof setTimeout> | null =
|
||||
null;
|
||||
private lastAutocompleteLookupPrefix: string | null = null;
|
||||
private static readonly AUTOCOMPLETE_DEBOUNCE_MS = 150;
|
||||
private static readonly AUTOCOMPLETE_DEBOUNCE_MS = 50;
|
||||
|
||||
// Paste tracking for large pastes
|
||||
private pastes: Map<number, string> = new Map();
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ if (require.main === module) {
|
|||
const WORKSPACE_PACKAGES = [
|
||||
"native",
|
||||
"pi-tui",
|
||||
"google-gemini-cli-provider",
|
||||
"pi-ai",
|
||||
"pi-agent-core",
|
||||
"pi-coding-agent",
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ mkdirSync(piAgentDir, { recursive: true });
|
|||
const copied = [];
|
||||
if (copyDir("extensions")) copied.push("extensions");
|
||||
if (copyDir("skills")) copied.push("skills");
|
||||
if (copyDir("workflow-skills")) copied.push("workflow-skills");
|
||||
if (copyDir("agents")) copied.push("agents");
|
||||
|
||||
const agentsMdSrc = join(resourcesDir, "AGENTS.md");
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ const scopeDir = join(root, "node_modules", scope);
|
|||
const packageDirs = [
|
||||
"native",
|
||||
"pi-agent-core",
|
||||
"google-gemini-cli-provider",
|
||||
"pi-ai",
|
||||
"pi-coding-agent",
|
||||
"pi-tui",
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ function removeIfContentMatches(targetPath, sourcePath, label) {
|
|||
|
||||
removeResourceEntries("extensions");
|
||||
removeResourceEntries("skills");
|
||||
removeResourceEntries("workflow-skills");
|
||||
removeResourceEntries("agents");
|
||||
removeIfContentMatches(
|
||||
join(piAgentDir, "AGENTS.md"),
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@ export const TOP_LEVEL_SUBCOMMANDS = [
|
|||
{ cmd: "queue", desc: "Queue and reorder future milestones" },
|
||||
{ cmd: "quick", desc: "Execute a quick task without full planning overhead" },
|
||||
{ cmd: "discuss", desc: "Discuss architecture and decisions" },
|
||||
{ cmd: "steer", desc: "Steerable autonomous panel (Shift+Tab)" },
|
||||
{ cmd: "capture", desc: "Fire-and-forget thought capture" },
|
||||
{ cmd: "debug", desc: "Create and inspect persistent /debug sessions" },
|
||||
{ cmd: "scan", desc: "Run source and project scans" },
|
||||
|
|
|
|||
|
|
@ -19,6 +19,13 @@ export default async function registerExtension(pi) {
|
|||
// tools, hooks) fails — e.g. due to a Windows-specific import error.
|
||||
const { registerSFCommands } = await import("./commands/index.js");
|
||||
registerSFCommands(pi);
|
||||
|
||||
// Register steerable autonomous extension for Copilot Auto-style controls
|
||||
const { default: steerableAutonomousExtension } = await import(
|
||||
"./steerable-autonomous-extension.js"
|
||||
);
|
||||
steerableAutonomousExtension(pi);
|
||||
|
||||
// Full setup (shortcuts, tools, hooks) in a separate try/catch so that
|
||||
// any platform-specific load failure doesn't take out the core command.
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -11,7 +11,9 @@ import { dirname, join } from "node:path";
|
|||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const SKILL_FILENAME = "SKILL.md";
|
||||
export { SKILL_FILENAME };
|
||||
const USER_SKILL_DIR = join(process.env.HOME ?? "", ".sf", "skills");
|
||||
export { USER_SKILL_DIR };
|
||||
const BUNDLED_SKILL_DIR = join(
|
||||
dirname(fileURLToPath(import.meta.url)),
|
||||
"..",
|
||||
|
|
@ -19,6 +21,15 @@ const BUNDLED_SKILL_DIR = join(
|
|||
"..",
|
||||
"skills",
|
||||
);
|
||||
export { BUNDLED_SKILL_DIR };
|
||||
const WORKFLOW_SKILL_DIR = join(
|
||||
dirname(fileURLToPath(import.meta.url)),
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"workflow-skills",
|
||||
);
|
||||
export { WORKFLOW_SKILL_DIR };
|
||||
|
||||
/**
|
||||
* Find all skill directories under a base path.
|
||||
|
|
@ -41,12 +52,12 @@ export function discoverSkillDirs(basePath) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Discover skills from all sources: project, user, and built-in.
|
||||
* Discover skills from all sources: project, user, built-in, and workflow-internal.
|
||||
*/
|
||||
export function discoverAllSkills(projectPath, options = {}) {
|
||||
const sources = [];
|
||||
|
||||
// Bundled SF skills
|
||||
// Bundled SF skills (user-facing, shown in /skills catalog)
|
||||
if (options.includeBundled && existsSync(BUNDLED_SKILL_DIR)) {
|
||||
const bundledSkills = discoverSkillDirsInRoot(BUNDLED_SKILL_DIR);
|
||||
for (const s of bundledSkills) {
|
||||
|
|
@ -54,6 +65,14 @@ export function discoverAllSkills(projectPath, options = {}) {
|
|||
}
|
||||
}
|
||||
|
||||
// Workflow-internal skills (hidden from users, injected by the runtime)
|
||||
if (options.includeWorkflow !== false && existsSync(WORKFLOW_SKILL_DIR)) {
|
||||
const workflowSkills = discoverSkillDirsInRoot(WORKFLOW_SKILL_DIR);
|
||||
for (const s of workflowSkills) {
|
||||
sources.push({ ...s, source: "workflow" });
|
||||
}
|
||||
}
|
||||
|
||||
// Project skills
|
||||
if (projectPath) {
|
||||
const projectSkills = discoverSkillDirs(projectPath);
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ export {
|
|||
readSkillFile,
|
||||
SKILL_FILENAME,
|
||||
USER_SKILL_DIR,
|
||||
WORKFLOW_SKILL_DIR,
|
||||
} from "./directory.js";
|
||||
export {
|
||||
createEvalCase,
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ export function loadSkills(projectPath, options = {}) {
|
|||
}
|
||||
|
||||
const validation =
|
||||
source === "bundled"
|
||||
source === "bundled" || source === "workflow"
|
||||
? validateBundledSkillFrontmatter(parsed.frontmatter)
|
||||
: validateSkillFrontmatter(parsed.frontmatter);
|
||||
if (!validation.valid) {
|
||||
|
|
@ -64,7 +64,10 @@ export function loadSkills(projectPath, options = {}) {
|
|||
}
|
||||
|
||||
const record = buildSkillRecord(path, parsed.frontmatter, parsed.body);
|
||||
if (
|
||||
if (source === "workflow") {
|
||||
// Workflow-internal skills are never user-invocable regardless of frontmatter
|
||||
record.userInvocable = false;
|
||||
} else if (
|
||||
source === "bundled" &&
|
||||
parsed.frontmatter["user-invocable"] === undefined
|
||||
) {
|
||||
|
|
@ -132,7 +135,8 @@ export function getPermittedSkills(skills, activeProfile) {
|
|||
*/
|
||||
export function getUserInvocableSkills(skills) {
|
||||
return skills.filter(
|
||||
(s) => s.source === "bundled" && s.valid && s.userInvocable,
|
||||
(s) =>
|
||||
s.source !== "workflow" && s.source === "bundled" && s.valid && s.userInvocable,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
92
src/resources/workflow-skills/assumption-log/SKILL.md
Normal file
92
src/resources/workflow-skills/assumption-log/SKILL.md
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
---
|
||||
name: assumption-log
|
||||
description: Document assumptions, proceed with sensible defaults, surface for review at milestones. Use in research and planning workflows where context is incomplete. Blocks the "ask the user every 5 minutes" pattern and the "guess silently and break something" pattern. Every assumption becomes a named, reviewable artifact.
|
||||
user-invocable: false
|
||||
model-invocable: true
|
||||
side-effects: none
|
||||
permission-profile: normal
|
||||
triggers:
|
||||
- plan
|
||||
- research
|
||||
- "*"
|
||||
---
|
||||
|
||||
# Assumption Log
|
||||
|
||||
## Iron Law
|
||||
|
||||
```
|
||||
NEVER GUESS SILENTLY.
|
||||
NEVER ASK FOR EVERY MISSING DETAIL.
|
||||
DOCUMENT THE ASSUMPTION, PICK A SENSIBLE DEFAULT, SURFACE FOR REVIEW.
|
||||
```
|
||||
|
||||
Silent guessing produces invisible errors. Asking for every missing detail breaks autonomous flow. The correct middle path: make the assumption explicit, pick a defensible default, continue, and surface the log at review gates.
|
||||
|
||||
## Recognize Your Own Rationalizations
|
||||
|
||||
- "I'll just ask the user." → Ask only when the decision is irreversible or the cost of a wrong assumption is high. For everything else: document and proceed.
|
||||
- "I know what they meant." → If you know, document the inference explicitly. If you don't know, document the assumption and the default you chose.
|
||||
- "It's obvious — I don't need to write it down." → What is obvious to you during planning is invisible to the reviewer and to your future self. Write it down.
|
||||
- "I'll address it when it comes up." → When it comes up, you won't remember what assumption you made. The log is the memory.
|
||||
|
||||
## When to Run
|
||||
|
||||
- At the start of any research or planning phase with incomplete context
|
||||
- When a planning decision depends on information that isn't in the codebase or spec
|
||||
- When a scope decision must be made without explicit instruction
|
||||
- Before each irreversible op (combine with `irreversible-ops` skill)
|
||||
|
||||
## Assumption Entry Format
|
||||
|
||||
For each assumption, record:
|
||||
|
||||
```
|
||||
Assumption ID: A-<NNN>
|
||||
Category: <scope | design | dependency | behaviour | constraint>
|
||||
Statement: <what you are assuming to be true>
|
||||
Basis: <why this default was chosen — evidence, convention, or reasoning>
|
||||
Default chosen: <the specific value, behaviour, or approach you will proceed with>
|
||||
Confidence: <high | medium | low>
|
||||
Falsifier: <what evidence would prove this assumption wrong>
|
||||
Review gate: <at what milestone or checkpoint this should be surfaced>
|
||||
Impact if wrong: <what breaks if the assumption is incorrect>
|
||||
```
|
||||
|
||||
**Confidence guidelines:**
|
||||
- `high` — strong evidence from code, docs, or established convention; probably correct
|
||||
- `medium` — inferred from partial evidence; plausible but should be confirmed
|
||||
- `low` — no evidence; pure default; must be confirmed before the affected code ships
|
||||
|
||||
## Assumption Categories
|
||||
|
||||
**Scope** — what is in/out of this task
|
||||
> "Assumption: the email notification feature is out of scope for this slice. Basis: spec says 'user profile update' with no mention of notifications. Default: skip. Review at slice completion."
|
||||
|
||||
**Design** — how something should be structured
|
||||
> "Assumption: use SQLite for local state storage rather than JSON files. Basis: project uses SQLite everywhere else. Default: SQLite. Confidence: high."
|
||||
|
||||
**Dependency** — which version, API, or external behaviour to rely on
|
||||
> "Assumption: the gateway API responds within 5 seconds. Basis: no SLA documented; 5s is standard for synchronous APIs. Default: 5s timeout. Confidence: medium."
|
||||
|
||||
**Behaviour** — what the system should do in an edge case
|
||||
> "Assumption: on parse error, return empty array not null. Basis: existing code uses empty arrays for not-found cases. Default: []. Confidence: high."
|
||||
|
||||
**Constraint** — limits on resources, permissions, or side effects
|
||||
> "Assumption: this migration is safe to run without a maintenance window. Basis: adds a nullable column, no lock required. Default: proceed without window. Confidence: medium. Falsifier: if table > 10M rows, lock time may matter."
|
||||
|
||||
## Review Gate Protocol
|
||||
|
||||
At each milestone or slice completion, surface all `medium` and `low` confidence assumptions:
|
||||
|
||||
1. List all logged assumptions for the current slice
|
||||
2. Mark each: `CONFIRMED` (user or evidence validated it), `REVISED` (different default chosen), or `OPEN` (still unconfirmed)
|
||||
3. Any `low` confidence assumption that remains `OPEN` blocks slice completion
|
||||
4. Any `medium` confidence assumption that remains `OPEN` is a known risk — document it in the slice evidence
|
||||
|
||||
## Completion Criteria
|
||||
|
||||
- [ ] All assumptions made during the workflow are logged with full entry format
|
||||
- [ ] All `low` confidence assumptions are confirmed or revised before the slice ships
|
||||
- [ ] All `medium` confidence assumptions are surfaced at the milestone gate
|
||||
- [ ] The assumption log is attached to the slice/task artifacts in `.sf/active/{unit-id}/assumptions.md`
|
||||
116
src/resources/workflow-skills/context-lean/SKILL.md
Normal file
116
src/resources/workflow-skills/context-lean/SKILL.md
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
---
|
||||
name: context-lean
|
||||
description: Prune context before each LLM call. Use in any multi-step workflow that accumulates context across iterations. Less but more relevant context produces better outputs. Prevents context bloat — the single biggest silent quality degrader in long autonomous runs.
|
||||
user-invocable: false
|
||||
model-invocable: true
|
||||
side-effects: none
|
||||
permission-profile: normal
|
||||
triggers:
|
||||
- "*"
|
||||
---
|
||||
|
||||
# Context Lean
|
||||
|
||||
## Iron Law
|
||||
|
||||
```
|
||||
CONTEXT IS A BUDGET, NOT A DUMP.
|
||||
EVERY TOKEN IN CONTEXT MUST EARN ITS PLACE.
|
||||
```
|
||||
|
||||
Adding more context is not safer than adding less. Irrelevant context degrades output quality by diluting signal. When in doubt, leave it out.
|
||||
|
||||
## Recognize Your Own Rationalizations
|
||||
|
||||
- "More context can't hurt — it gives the model more to work with." → Wrong. Noise degrades recall. The model attends to everything; irrelevant context steals attention from relevant context.
|
||||
- "I'll include the whole file to be safe." → Include only the functions you're actually modifying. The rest is noise.
|
||||
- "I need to include the history so the model understands the situation." → Include the summary, not the transcript. Summaries are signal; raw transcripts are noise.
|
||||
- "The token limit isn't hit yet, so it's fine." → Token limits are not quality thresholds. Quality degrades well before the limit.
|
||||
|
||||
## When to Run
|
||||
|
||||
Before any LLM call in a multi-step workflow. Especially:
|
||||
- Before each autonomous iteration
|
||||
- Before a planning call that synthesizes many inputs
|
||||
- After completing a phase (prune phase artifacts before the next phase)
|
||||
- When the context window is more than 50% full
|
||||
|
||||
## Skill Chain
|
||||
|
||||
Inline skill. Run as a pre-call gate before each significant LLM invocation.
|
||||
|
||||
```
|
||||
← prev: any skill, before its LLM call
|
||||
→ next: return to the invoking skill with pruned context
|
||||
```
|
||||
|
||||
## Pruning Protocol
|
||||
|
||||
Apply in order. Stop when the context is lean.
|
||||
|
||||
### Step 1 — Remove completed work
|
||||
|
||||
Anything that was needed to get to the current state but is not needed to proceed:
|
||||
- Completed task details (keep the summary, drop the steps)
|
||||
- Resolved errors (keep the fix, drop the stack trace)
|
||||
- Superseded plans (keep the current plan, drop the draft)
|
||||
|
||||
### Step 2 — Summarize transcripts
|
||||
|
||||
Raw conversation history is always worse than a summary. For any context block older than the current phase:
|
||||
1. Write a 3-5 sentence summary: what was decided, what was built, what failed
|
||||
2. Replace the transcript block with the summary
|
||||
3. Keep only the last 2-3 turns verbatim (for continuity)
|
||||
|
||||
### Step 3 — Scope file content
|
||||
|
||||
Never include entire files when you only need parts of them:
|
||||
- Include only the functions/methods being modified
|
||||
- Include only the test cases for the current behaviour
|
||||
- Include only the error output relevant to the current failure
|
||||
|
||||
If a file must be included whole (e.g., a small config), it must be ≤ 50 lines or explicitly justified.
|
||||
|
||||
### Step 4 — Audit includes
|
||||
|
||||
For every block of context, ask: **if this were removed, would the model's output be worse?** If the answer is "maybe not," remove it.
|
||||
|
||||
Keep:
|
||||
- The current task/goal (always)
|
||||
- The specific code being modified (always)
|
||||
- The error message or test failure driving the current step (always)
|
||||
- The contract/spec for the current slice (always)
|
||||
- Recent decisions that constrain the current step
|
||||
|
||||
Remove:
|
||||
- Earlier phases' full output (summarize)
|
||||
- Files not touched in the current step
|
||||
- Passing test output (keep only failures)
|
||||
- Dependency documentation (link, don't include)
|
||||
- Comment threads and discussion (summarize conclusions)
|
||||
|
||||
### Step 5 — Verify budget
|
||||
|
||||
After pruning:
|
||||
- Context should fit in < 30% of the token budget for simple tasks, < 60% for complex ones
|
||||
- If still over budget after pruning, the task is too large for one call — split it
|
||||
|
||||
## Context Composition Rules
|
||||
|
||||
| Source | Include | Format |
|
||||
|--------|---------|--------|
|
||||
| Current task | Always | Full |
|
||||
| Current file being edited | Only changed functions | Snippet |
|
||||
| Current error / test failure | Always | Full |
|
||||
| Previous phase output | Summary only | 3-5 sentences |
|
||||
| Related file (not being edited) | Only the contract/signature | Snippet |
|
||||
| Conversation history | Last 2-3 turns + summary of rest | Mixed |
|
||||
| Documentation | Never inline | Reference by path |
|
||||
|
||||
## Completion Criteria
|
||||
|
||||
Context is lean when:
|
||||
- [ ] No completed phase artifacts in full (only summaries)
|
||||
- [ ] No entire files included when snippets suffice
|
||||
- [ ] Every included block answers "yes" to the audit question
|
||||
- [ ] Token budget is within target
|
||||
130
src/resources/workflow-skills/error-routing/SKILL.md
Normal file
130
src/resources/workflow-skills/error-routing/SKILL.md
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
---
|
||||
name: error-routing
|
||||
description: Route errors by type, not severity. Use in any workflow with retry or error-handling steps. Maps error classes (transient, semantic, auth, infra, logic, contract) to their correct handlers. Prevents the two most common agent failure modes — retrying logic errors, and ignoring transient failures.
|
||||
user-invocable: false
|
||||
model-invocable: true
|
||||
side-effects: none
|
||||
permission-profile: normal
|
||||
triggers:
|
||||
- build
|
||||
- repair
|
||||
- "*"
|
||||
---
|
||||
|
||||
# Error Routing
|
||||
|
||||
## Iron Law
|
||||
|
||||
```
|
||||
ROUTE BY CLASS FIRST, SEVERITY SECOND.
|
||||
NEVER RETRY A LOGIC ERROR.
|
||||
NEVER ABANDON A TRANSIENT ERROR WITHOUT RETRY.
|
||||
```
|
||||
|
||||
Retrying a logic error wastes time and can cause data corruption. Abandoning a transient error causes false failures. Routing by severity ("it's a 500, must be important") misclassifies both.
|
||||
|
||||
## Recognize Your Own Rationalizations
|
||||
|
||||
- "It failed, so I'll try a different approach." → Different approach to what? Classify the error first. A different approach to a transient failure is wrong — you need the same approach with a wait.
|
||||
- "It's a 500 error — must be a server problem." → HTTP 500s include logic errors, auth errors, and transient failures. Read the body.
|
||||
- "Let me retry with exponential backoff." → Exponential backoff is for transient errors only. Applying it to logic errors just slows down the failure.
|
||||
- "The test is flaky — I'll just retry it." → Flaky tests are infrastructure errors or race conditions. Classify and fix, don't retry blindly.
|
||||
|
||||
## Error Class Taxonomy
|
||||
|
||||
### Transient
|
||||
|
||||
**Definition:** Will resolve without code change, given time or retry.
|
||||
|
||||
**Examples:** network timeout, rate limit (429), service temporarily unavailable (503), lock contention, resource temporarily exhausted.
|
||||
|
||||
**Handler:** Retry with wait. Use Retry-After header if present; otherwise exponential backoff (1s, 2s, 4s, max 30s). Max 3 retries. If still failing after 3 retries, escalate to infra error.
|
||||
|
||||
**Do NOT:** change code, change approach, or report as a bug.
|
||||
|
||||
---
|
||||
|
||||
### Auth / Credential
|
||||
|
||||
**Definition:** Request rejected due to missing or invalid credentials.
|
||||
|
||||
**Examples:** 401, 403, expired token, invalid API key, insufficient permissions.
|
||||
|
||||
**Handler:** Do NOT retry. Surface immediately with the exact credential or permission required. Never attempt to infer or work around missing auth — escalate to the human.
|
||||
|
||||
**Do NOT:** retry, change approach, or attempt alternative auth methods.
|
||||
|
||||
---
|
||||
|
||||
### Logic / Contract
|
||||
|
||||
**Definition:** Code does the wrong thing. The error is in the logic, not the environment.
|
||||
|
||||
**Examples:** wrong output, failing assertion, type error, invariant violation, business rule violation, test failure (not flaky).
|
||||
|
||||
**Handler:** Debug, find root cause, fix. Follow `systematic-debugging` skill protocol. Do NOT retry or use a workaround.
|
||||
|
||||
**Do NOT:** retry, add a workaround, suppress the error.
|
||||
|
||||
---
|
||||
|
||||
### Infra / Environment
|
||||
|
||||
**Definition:** The execution environment is broken in a way that requires external action.
|
||||
|
||||
**Examples:** disk full, out of memory, missing required tool, corrupt DB, missing env var that cannot be inferred.
|
||||
|
||||
**Handler:** Surface immediately. Describe exactly what is missing and what the minimum fix is. Do NOT attempt to work around infra failures in code.
|
||||
|
||||
**Do NOT:** retry, assume it will resolve, add fallback code.
|
||||
|
||||
---
|
||||
|
||||
### Semantic / Integration
|
||||
|
||||
**Definition:** Two components disagree on a contract — schema mismatch, API version mismatch, unexpected data shape.
|
||||
|
||||
**Examples:** JSON parse error on valid-looking response, unexpected null where required, field name changed in dependency.
|
||||
|
||||
**Handler:** Investigate the contract. Identify which side is wrong (caller or callee). Fix the contract mismatch, not the symptom.
|
||||
|
||||
**Do NOT:** add nil-guards without understanding why the nil is there.
|
||||
|
||||
---
|
||||
|
||||
### Scope / Ambiguity
|
||||
|
||||
**Definition:** Cannot proceed because the task is not well-defined enough to make a correct decision.
|
||||
|
||||
**Examples:** conflicting requirements, missing spec, ambiguous acceptance criteria.
|
||||
|
||||
**Handler:** Surface the ambiguity with the specific decision that is blocked. Follow `assumption-log` protocol — document the assumption, pick a sensible default, mark for review.
|
||||
|
||||
**Do NOT:** guess silently.
|
||||
|
||||
## Routing Decision Tree
|
||||
|
||||
```
|
||||
Error occurs
|
||||
│
|
||||
├─ Is it a network/rate-limit/timeout? → TRANSIENT → retry with wait
|
||||
│
|
||||
├─ Is it auth/403/401/credential? → AUTH → surface, do not retry
|
||||
│
|
||||
├─ Is it a test failure or wrong output? → LOGIC → debug + fix
|
||||
│
|
||||
├─ Is the environment broken? → INFRA → surface, external action needed
|
||||
│
|
||||
├─ Is it a contract/schema mismatch? → SEMANTIC → investigate contract
|
||||
│
|
||||
└─ Is the task underspecified? → SCOPE → assumption-log protocol
|
||||
```
|
||||
|
||||
## Completion Criteria
|
||||
|
||||
For each error encountered in the workflow:
|
||||
- [ ] Error classified by type (not severity)
|
||||
- [ ] Handler applied per classification
|
||||
- [ ] Resolution recorded (what the error was, what fixed it)
|
||||
- [ ] No logic errors suppressed or worked around
|
||||
- [ ] No transient errors abandoned without retry
|
||||
132
src/resources/workflow-skills/handoff-readability/SKILL.md
Normal file
132
src/resources/workflow-skills/handoff-readability/SKILL.md
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
---
|
||||
name: handoff-readability
|
||||
description: Enforce boring code, why-comments on non-obvious decisions, and clean interface contracts. Use in code-generation workflows. Makes rewrites cheap, reduces onboarding time, and prevents the "only the original author understands this" failure mode.
|
||||
user-invocable: false
|
||||
model-invocable: true
|
||||
side-effects: none
|
||||
permission-profile: normal
|
||||
triggers:
|
||||
- build
|
||||
- review
|
||||
- "*"
|
||||
---
|
||||
|
||||
# Handoff Readability
|
||||
|
||||
## Iron Law
|
||||
|
||||
```
|
||||
CODE IS READ 10X MORE THAN IT IS WRITTEN.
|
||||
WRITE FOR THE READER WHO HAS ZERO CONTEXT.
|
||||
BORING CODE IS A FEATURE.
|
||||
```
|
||||
|
||||
Clever code that only the author can read is a liability. Every non-obvious decision is a future debugging session waiting to happen. Every missing comment on a "why" is a future misunderstanding that will produce a silent regression.
|
||||
|
||||
## Recognize Your Own Rationalizations
|
||||
|
||||
- "It's obvious what this does." → Obvious to you, now, with context. Not obvious at 2am during an incident to someone who didn't write it.
|
||||
- "Comments are noise." → Implementation comments are often noise. *Why* comments are always signal.
|
||||
- "The code is self-documenting." → Function names document *what*. Only comments document *why*.
|
||||
- "I'll clean it up later." → Later is when you're two milestones ahead and the context is gone. Clean it now.
|
||||
|
||||
## When to Run
|
||||
|
||||
- During code generation (inline, as you write)
|
||||
- During code review (check existing code for violations)
|
||||
- Before marking a slice complete (final readability pass)
|
||||
|
||||
## The Three Rules
|
||||
|
||||
### Rule 1: Boring over clever
|
||||
|
||||
Prefer the solution a junior developer can read and modify. If you face a choice between:
|
||||
- An elegant one-liner and a readable 5-liner → use the 5-liner
|
||||
- A clever abstraction and a repeated-but-obvious pattern → repeat it until repetition is clearly worth abstracting
|
||||
- A performance micro-optimization and readable code → readable code, unless the performance requirement is proven
|
||||
|
||||
**Exception:** performance-critical paths (must be documented with a benchmark that proves the optimization is necessary).
|
||||
|
||||
### Rule 2: Why-comments on every non-obvious decision
|
||||
|
||||
A comment is required when:
|
||||
- The code does something that looks wrong but is intentional
|
||||
- The code uses a non-standard approach for a reason
|
||||
- A value or constant was chosen for a specific reason (not arbitrary)
|
||||
- The code handles an edge case that isn't obvious from the types
|
||||
|
||||
Format:
|
||||
```ts
|
||||
// WHY: <reason the non-obvious thing is correct>
|
||||
```
|
||||
|
||||
Examples:
|
||||
```ts
|
||||
// WHY: SQLite WAL mode is required here — the default journal mode causes
|
||||
// write contention when multiple processes access the same DB file.
|
||||
db.pragma("journal_mode = WAL");
|
||||
|
||||
// WHY: Retry up to 3 times with 1s backoff. The gateway has a 500ms cold-start
|
||||
// window after idle; the first call will often fail.
|
||||
const result = await retry(call, { times: 3, waitMs: 1000 });
|
||||
|
||||
// WHY: Empty array not null — callers use .length checks without null guards.
|
||||
if (!data) return [];
|
||||
```
|
||||
|
||||
### Rule 3: Clean interface contracts
|
||||
|
||||
Every exported function needs a contract that answers:
|
||||
- **What does it return** (type + what null/undefined/empty means)
|
||||
- **What are the preconditions** (what must be true for it to work)
|
||||
- **What are the side effects** (writes, events, mutations)
|
||||
|
||||
Bad:
|
||||
```ts
|
||||
export function processUser(user) { ... }
|
||||
```
|
||||
|
||||
Good:
|
||||
```ts
|
||||
/**
|
||||
* Validate and normalize a user record for DB insertion.
|
||||
* Returns null if the record fails validation (caller decides whether to throw).
|
||||
* Side effects: none. Pure function.
|
||||
* Precondition: user.id must be a non-empty string.
|
||||
*/
|
||||
export function processUser(user: RawUser): NormalizedUser | null { ... }
|
||||
```
|
||||
|
||||
## Rewrites-Cheap Test
|
||||
|
||||
Before submitting a slice, ask:
|
||||
|
||||
1. **Could a new team member understand each function without reading its callers?**
|
||||
If no → add why-comments or simplify.
|
||||
|
||||
2. **Could the core logic be replaced without touching the interface?**
|
||||
If no → the interface is coupled to the implementation. Separate them.
|
||||
|
||||
3. **Are there any "magic" values without a named constant and a why-comment?**
|
||||
If yes → name the constant and explain the value.
|
||||
|
||||
4. **Does every exported symbol have a contract (JSDoc with purpose + consumer)?**
|
||||
If no → add it before marking the slice done.
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
| Pattern | Problem | Fix |
|
||||
|---------|---------|-----|
|
||||
| `// do the thing` | Describes what, not why | Replace with a why-comment or delete |
|
||||
| `const x = 42` | Magic number | `const MAX_RETRIES = 3; // WHY: ...` |
|
||||
| One-letter variables outside loops | Forces reader to track mental state | Use descriptive names |
|
||||
| Deeply nested conditionals | Hard to follow control flow | Extract to named functions |
|
||||
| Side effects in getters | Violates principle of least surprise | Separate reads from writes |
|
||||
|
||||
## Completion Criteria
|
||||
|
||||
- [ ] No magic values without named constants and why-comments
|
||||
- [ ] Every non-obvious decision has a `// WHY:` comment
|
||||
- [ ] Every exported symbol has a purpose + consumer JSDoc
|
||||
- [ ] Core logic is replaceable without changing the interface
|
||||
- [ ] A new team member can understand each function without external context
|
||||
96
src/resources/workflow-skills/irreversible-ops/SKILL.md
Normal file
96
src/resources/workflow-skills/irreversible-ops/SKILL.md
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
---
|
||||
name: irreversible-ops
|
||||
description: Human-review gate for irreversible operations — deploys, database migrations, published artifact pushes, force pushes, and destructive deletes. Use in any workflow that touches infra, DB schema, or published artifacts. Classifies reversibility, injects a mandatory verification step, and blocks autonomous progression past the gate.
|
||||
user-invocable: false
|
||||
model-invocable: true
|
||||
side-effects: none
|
||||
permission-profile: trusted
|
||||
triggers:
|
||||
- build
|
||||
- repair
|
||||
- "*"
|
||||
---
|
||||
|
||||
# Irreversible Ops
|
||||
|
||||
## Iron Law
|
||||
|
||||
```
|
||||
BEFORE AN IRREVERSIBLE OP: STOP, CLASSIFY, GATE.
|
||||
NO AUTONOMOUS AGENT CROSSES AN IRREVERSIBLE BOUNDARY WITHOUT AN EXPLICIT HUMAN GATE.
|
||||
```
|
||||
|
||||
An operation is irreversible if rolling it back requires more than running one command. If you are not certain, treat it as irreversible.
|
||||
|
||||
## Recognize Your Own Rationalizations
|
||||
|
||||
- "It's a dev environment — I can always recreate it." → Development data and schemas that are not in source control are irreversible. Assume production semantics until proven otherwise.
|
||||
- "The migration is small and I've done this before." → Size and familiarity do not reduce irreversibility. The gate is about the op class, not the op size.
|
||||
- "Autonomous mode is enabled, so I can proceed." → Autonomous mode governs pace and interaction style. It does not remove irreversibility gates.
|
||||
- "I'll add a rollback plan after." → Rollback plan comes first, before the gate can be passed.
|
||||
|
||||
## Irreversible Op Classification
|
||||
|
||||
### Class A — Always requires human gate
|
||||
|
||||
| Operation | Why irreversible |
|
||||
|-----------|-----------------|
|
||||
| Database migration (schema change) | Column drops, type changes, constraint adds — data loss risk |
|
||||
| Published package version bump | npm/PyPI/GitHub Releases — cannot be un-published cleanly |
|
||||
| Force push to protected branch | Rewrites shared history |
|
||||
| Production deploy | Live traffic impact; rollback window may close |
|
||||
| Secret/credential rotation | Old credentials may already be in use |
|
||||
| Mass delete (files, records, buckets) | Data loss if incorrect |
|
||||
| External service configuration change | May affect other consumers |
|
||||
|
||||
### Class B — Requires gate in autonomous mode, can proceed in assisted/manual
|
||||
|
||||
| Operation | Condition |
|
||||
|-----------|-----------|
|
||||
| Database migration (data backfill) | If revert is a compensating migration |
|
||||
| Git tag creation | If CI/CD triggers on tags |
|
||||
| API endpoint removal | If consumers may exist |
|
||||
| Config change affecting behaviour | If not behind a feature flag |
|
||||
|
||||
### Class C — No gate required
|
||||
|
||||
- Adding new columns (no existing data affected)
|
||||
- Creating new tables
|
||||
- Adding new endpoints
|
||||
- Adding new feature flags (not yet enabled)
|
||||
- Writing tests
|
||||
- Modifying local dev config
|
||||
|
||||
## Gate Protocol
|
||||
|
||||
Before any Class A or Class B op, produce in writing:
|
||||
|
||||
```
|
||||
Op class: <A | B>
|
||||
Operation: <exact description of what will happen>
|
||||
Affected scope: <which data, which services, which users>
|
||||
Reversibility: <how to undo this if it goes wrong — be specific>
|
||||
Rollback plan: <exact command(s) to roll back>
|
||||
Verification: <how will you know it succeeded?>
|
||||
Gate: BLOCKED — requires human confirmation before proceeding
|
||||
```
|
||||
|
||||
Do NOT proceed until the human confirms. "Confirmed" means explicit approval of the exact operation described above, not a general "go ahead."
|
||||
|
||||
## Post-Gate Checklist
|
||||
|
||||
After the human gate passes:
|
||||
- [ ] Backup taken (or confirmed unnecessary with reason)
|
||||
- [ ] Rollback plan is still valid
|
||||
- [ ] Monitoring/alerting is in place
|
||||
- [ ] Operation executed exactly as described in the gate record
|
||||
- [ ] Verification result recorded
|
||||
|
||||
If the actual operation deviates from the gate description, stop and re-gate.
|
||||
|
||||
## Completion Criteria
|
||||
|
||||
- [ ] Every irreversible op in the workflow has been classified
|
||||
- [ ] All Class A ops have a gate record + human confirmation
|
||||
- [ ] All Class B ops in autonomous mode have a gate record + human confirmation
|
||||
- [ ] Post-gate checklist complete for each executed op
|
||||
119
src/resources/workflow-skills/observe-first/SKILL.md
Normal file
119
src/resources/workflow-skills/observe-first/SKILL.md
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
---
|
||||
name: observe-first
|
||||
description: Enforce read-map-understand before any edit. Use at the start of any workflow that modifies existing code in an unfamiliar or partially-familiar codebase. Prevents the "Junior Refactor" failure mode — making changes without knowing what the code does or how it's used. Side-chain skill that gates the modify phase.
|
||||
user-invocable: false
|
||||
model-invocable: true
|
||||
side-effects: none
|
||||
permission-profile: normal
|
||||
triggers:
|
||||
- build
|
||||
- repair
|
||||
- review
|
||||
- "*"
|
||||
---
|
||||
|
||||
# Observe First
|
||||
|
||||
## Iron Law
|
||||
|
||||
```
|
||||
NO EDIT WITHOUT A MENTAL MODEL.
|
||||
NO MENTAL MODEL WITHOUT EVIDENCE.
|
||||
```
|
||||
|
||||
If you have not completed Phase 1 (Structure) and Phase 2 (Usage), you are not permitted to modify any file. The modification phase is blocked until both phases produce written findings.
|
||||
|
||||
## Recognize Your Own Rationalizations
|
||||
|
||||
These are the exact shortcuts you will reach for. Each is wrong:
|
||||
|
||||
- "I can see what it does from the name." → Names lie. Read the body.
|
||||
- "I only need to change one line." → You don't know which one yet without reading the callers.
|
||||
- "I've seen this pattern before." → Familiarity is not analysis. This codebase may use the pattern differently.
|
||||
- "I'll figure it out as I go." → Going is the wrong order. Understand first, then go.
|
||||
- "The tests will catch mistakes." → Tests catch regressions you knew about. They don't catch structural misunderstandings.
|
||||
|
||||
## When to Run
|
||||
|
||||
- Any workflow that modifies existing code you haven't read end-to-end in this session.
|
||||
- Planning phases that require accurate impact analysis before choosing an approach.
|
||||
- Whenever the scope of a change is unclear.
|
||||
|
||||
Do NOT skip this skill for "small" changes — small changes with wrong mental models cause the most silent bugs.
|
||||
|
||||
## Skill Chain
|
||||
|
||||
Side-chain gate. Blocks the modify phase until both observe phases complete.
|
||||
|
||||
```
|
||||
← prev: plan, spec-first-tdd, or any workflow beginning a modify phase
|
||||
→ next: return to the invoking workflow once Phase 1 + Phase 2 are in writing
|
||||
```
|
||||
|
||||
## Phase 1 — Structure Map
|
||||
|
||||
Map the file/module being modified before touching it.
|
||||
|
||||
```bash
|
||||
# Who owns the symbol?
|
||||
rg -n "export.*<symbol>|function <symbol>|class <symbol>" src/ packages/
|
||||
|
||||
# What does the file do?
|
||||
cat <file> | head -80 # module header, imports, exports
|
||||
rg -n "export " <file> # public surface
|
||||
|
||||
# What are its dependencies?
|
||||
rg -n "^import " <file> # what it imports
|
||||
rg -rn "from.*<module>" src/ # who imports this module
|
||||
```
|
||||
|
||||
Produce written output:
|
||||
1. **Module purpose** — one sentence: why does this module exist?
|
||||
2. **Exports** — list every exported symbol and its type
|
||||
3. **Callers** — list every file that imports this module
|
||||
4. **Dependencies** — list what this module imports from elsewhere
|
||||
|
||||
Do NOT proceed to Phase 2 until this list exists in writing.
|
||||
|
||||
## Phase 2 — Usage Analysis
|
||||
|
||||
For each symbol you intend to modify, trace how it is called.
|
||||
|
||||
```bash
|
||||
# All call sites
|
||||
rg -n "<symbol>" src/ packages/ --type ts --type js
|
||||
|
||||
# Test coverage
|
||||
rg -rn "<symbol>" src/ --include="*.test.*"
|
||||
|
||||
# Recent history
|
||||
git log --oneline -10 -- <file>
|
||||
git log --oneline -10 -S "<symbol>" # commits that changed this symbol
|
||||
```
|
||||
|
||||
Produce written output for each symbol:
|
||||
1. **Call sites** — file:line for every caller, with the argument values passed
|
||||
2. **Contract** — what callers expect in return (inferred from usage)
|
||||
3. **Invariants** — what must be true before/after this symbol runs
|
||||
4. **Change blast radius** — which callers break if you change the signature or behaviour
|
||||
|
||||
Do NOT write any code until this list exists in writing.
|
||||
|
||||
## Phase 3 — Modification (Unblocked)
|
||||
|
||||
Only after Phases 1 and 2 are documented:
|
||||
|
||||
1. Make the **smallest** change that satisfies the contract.
|
||||
2. Keep changes inside the blast radius you mapped — no scope creep.
|
||||
3. If the blast radius is larger than expected, surface it before continuing.
|
||||
4. Update callers in the order dictated by the dependency map, not alphabetically.
|
||||
|
||||
## Completion Criteria
|
||||
|
||||
You may exit this skill and return to the invoking workflow when:
|
||||
|
||||
- [ ] Phase 1 findings written (module purpose, exports, callers, deps)
|
||||
- [ ] Phase 2 findings written (call sites, contract, invariants, blast radius) for every symbol to be modified
|
||||
- [ ] The modification is bounded to the mapped blast radius
|
||||
|
||||
If Phase 1 or Phase 2 reveals that the change is larger than originally scoped, **stop and surface the new scope** before modifying anything.
|
||||
134
src/resources/workflow-skills/state-discipline/SKILL.md
Normal file
134
src/resources/workflow-skills/state-discipline/SKILL.md
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
---
|
||||
name: state-discipline
|
||||
description: Enforce structured, deterministic state management in long-running workflows. Use in any multi-step workflow that persists state across iterations. Prevents LLM-managed state, in-memory-only state, and unstructured file-based state — the three failure modes that cause autonomous loops to lose track of where they are.
|
||||
user-invocable: false
|
||||
model-invocable: true
|
||||
side-effects: none
|
||||
permission-profile: normal
|
||||
triggers:
|
||||
- build
|
||||
- plan
|
||||
- "*"
|
||||
---
|
||||
|
||||
# State Discipline
|
||||
|
||||
## Iron Law
|
||||
|
||||
```
|
||||
STATE LIVES IN SQLITE OR ON DISK AS STRUCTURED FILES.
|
||||
NEVER IN THE LLM'S CONTEXT WINDOW.
|
||||
NEVER IN MEMORY ACROSS STEPS.
|
||||
```
|
||||
|
||||
Context-window state is lost on restart, summarization, and context compaction. In-memory state is lost on crash. Only SQLite and structured files survive restarts, crashes, and context rotation.
|
||||
|
||||
## Recognize Your Own Rationalizations
|
||||
|
||||
- "I'll track the progress in my context." → Context is summarized and truncated. Progress state in context is lost exactly when you need it most — after a crash or a long run.
|
||||
- "I'll use a JSON object in a variable." → In-memory variables don't survive the tool call boundary. Each tool invocation is a fresh execution context.
|
||||
- "It's simpler to just write to a text file." → Unstructured text files can't be queried, can't be joined, and produce parse errors under concurrent access. Use SQLite.
|
||||
- "I'll write the state management after the feature works." → State management is not a feature — it is the foundation. Without it, you can't resume, can't retry, and can't verify.
|
||||
|
||||
## When to Run
|
||||
|
||||
- Before designing any multi-step workflow that must survive restart
|
||||
- When a workflow has been running for more than 2 iterations
|
||||
- When implementing retry logic that requires tracking attempts
|
||||
- When implementing any lock, queue, or work-item pattern
|
||||
|
||||
## The Four State Rules
|
||||
|
||||
### Rule 1: SQLite for structured state
|
||||
|
||||
Use `.sf/sf.db` (or a task-scoped DB) for any state with schema, ordering, priority, joins, or queries.
|
||||
|
||||
**Use SQLite when:**
|
||||
- Tracking work items (pending/in-progress/done)
|
||||
- Recording retry counts
|
||||
- Storing key-value configuration that persists across steps
|
||||
- Any state that needs to be queried or filtered
|
||||
|
||||
**Use structured files when:**
|
||||
- The state is a single document (a plan, a spec, an evidence file)
|
||||
- The state is append-only and never queried (logs)
|
||||
- The state must be human-readable and is the primary artifact
|
||||
|
||||
**Never use:**
|
||||
- In-memory variables for state that crosses step boundaries
|
||||
- Free-form text files for state that needs to be queried
|
||||
- LLM context window for state that must survive restart
|
||||
|
||||
### Rule 2: Schema before data
|
||||
|
||||
Define the schema explicitly before inserting any rows. The schema is the contract:
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS workflow_units (
|
||||
id TEXT PRIMARY KEY,
|
||||
status TEXT NOT NULL DEFAULT 'pending' -- pending | in_progress | done | blocked
|
||||
CHECK(status IN ('pending','in_progress','done','blocked')),
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
error TEXT -- last error if status = blocked
|
||||
);
|
||||
```
|
||||
|
||||
Never add rows to an undefined table. Never use a table whose schema you haven't verified.
|
||||
|
||||
### Rule 3: Atomic transitions
|
||||
|
||||
State transitions must be atomic. Use SQLite transactions for multi-step transitions:
|
||||
|
||||
```sql
|
||||
BEGIN;
|
||||
UPDATE workflow_units SET status = 'in_progress', updated_at = datetime('now')
|
||||
WHERE id = :id AND status = 'pending'; -- conditional: only if still pending
|
||||
-- do the work
|
||||
UPDATE workflow_units SET status = 'done', updated_at = datetime('now')
|
||||
WHERE id = :id;
|
||||
COMMIT;
|
||||
```
|
||||
|
||||
Never set status to 'in_progress' in one statement and 'done' in another without a transaction — a crash between the two leaves inconsistent state.
|
||||
|
||||
### Rule 4: Resume from state, not from memory
|
||||
|
||||
Every workflow step must be resumable from the DB alone:
|
||||
|
||||
```sql
|
||||
-- Find the next pending unit (resumable from cold start)
|
||||
SELECT * FROM workflow_units
|
||||
WHERE status = 'pending'
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM workflow_units dep
|
||||
JOIN unit_deps d ON d.depends_on = dep.id
|
||||
WHERE d.unit_id = workflow_units.id AND dep.status != 'done'
|
||||
)
|
||||
ORDER BY priority DESC, created_at ASC
|
||||
LIMIT 1;
|
||||
```
|
||||
|
||||
If you cannot reconstruct "where the workflow is" from a single SQL query, the state model is wrong.
|
||||
|
||||
## State Inventory Checklist
|
||||
|
||||
Before implementing a multi-step workflow, produce this inventory:
|
||||
|
||||
```
|
||||
State item: <what needs to be remembered>
|
||||
Lifetime: <step | iteration | session | permanent>
|
||||
Schema: <table + columns, or file path + format>
|
||||
Read pattern: <how it is queried>
|
||||
Write pattern: <when and how it is updated>
|
||||
Conflict rule: <what happens if two processes write simultaneously>
|
||||
Recovery: <how to detect and fix corrupt state>
|
||||
```
|
||||
|
||||
## Completion Criteria
|
||||
|
||||
- [ ] All cross-step state is in SQLite or structured files
|
||||
- [ ] Schema is defined before any data is written
|
||||
- [ ] All state transitions are atomic (transactions for multi-step)
|
||||
- [ ] The workflow is resumable from the DB alone after a cold restart
|
||||
- [ ] No state stored only in context or in-memory variables
|
||||
91
src/resources/workflow-skills/vertical-slice/SKILL.md
Normal file
91
src/resources/workflow-skills/vertical-slice/SKILL.md
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
---
|
||||
name: vertical-slice
|
||||
description: Enforce end-to-end working increments at each workflow step. Use during planning and decomposition phases. Prevents "horizontal layers" — building all models, then all services, then all tests — which produces nothing shippable until the very end. Every slice must be testable and deployable in isolation.
|
||||
user-invocable: false
|
||||
model-invocable: true
|
||||
side-effects: none
|
||||
permission-profile: normal
|
||||
triggers:
|
||||
- plan
|
||||
- build
|
||||
- "*"
|
||||
---
|
||||
|
||||
# Vertical Slice
|
||||
|
||||
## Iron Law
|
||||
|
||||
```
|
||||
EVERY SLICE MUST BE INDEPENDENTLY TESTABLE AND DEPLOYABLE.
|
||||
NO SLICE IS DONE UNTIL ITS CONSUMER PATH WORKS END-TO-END.
|
||||
```
|
||||
|
||||
A slice that produces "partial infrastructure" is not a slice — it is a layer. Layers are not shippable. If the slice cannot be verified in isolation, it is too large or wrongly cut.
|
||||
|
||||
## Recognize Your Own Rationalizations
|
||||
|
||||
- "I'll wire it up in the next slice." → If it can't be verified now, you can't confirm the first slice worked. Bugs compound invisibly.
|
||||
- "It's more efficient to build all the DB tables first." → It is more efficient to ship nothing until the very end. Horizontal layers guarantee integration surprises.
|
||||
- "The consumer isn't built yet." → Then build a stub consumer in this slice. The slice defines its own consumer path.
|
||||
- "I'll test it all together when it's complete." → "Together" is where integration bugs hide. Test each slice independently.
|
||||
|
||||
## When to Run
|
||||
|
||||
- Planning or decomposition: before breaking a milestone into tasks.
|
||||
- Slice review: before starting a new slice, confirm the previous one is truly end-to-end.
|
||||
- When an autonomous loop has been running for more than two slices without a shippable increment.
|
||||
|
||||
## Skill Chain
|
||||
|
||||
Planning-phase skill. Inline with the main delivery chain.
|
||||
|
||||
```
|
||||
← prev: architecture-planning, pm-planning, or any planning phase
|
||||
→ next: spec-first-tdd (write the failing test for the first slice)
|
||||
```
|
||||
|
||||
## Slice Definition Protocol
|
||||
|
||||
For each slice, define **before writing any code**:
|
||||
|
||||
```
|
||||
Slice ID: <S01, S02, ...>
|
||||
Purpose: <one sentence — why does this slice exist? what value does it add?>
|
||||
Entry point: <the user-visible or API-visible surface that exercises this slice>
|
||||
Done state: <exact observable behaviour that proves this slice is complete>
|
||||
Verifier: <the command or test that confirms done state — must be runnable>
|
||||
Stub strategy: <if a dependency isn't built yet, what stub/fake makes this testable?>
|
||||
```
|
||||
|
||||
A slice without a `Verifier` is not a valid slice. Stop and define one before proceeding.
|
||||
|
||||
## Anti-Patterns to Detect and Reject
|
||||
|
||||
| Pattern | Problem | Correct Cut |
|
||||
|---------|---------|-------------|
|
||||
| "Add all DB tables" | No consumer, not testable alone | "Add one table + one read + one test" |
|
||||
| "Build the service layer" | No entry point, no verifier | "Add one endpoint that returns real data from DB" |
|
||||
| "Implement the model" | Model without integration is not slice | "Add model + minimal handler + test that calls handler" |
|
||||
| "Set up infrastructure" | Infrastructure without behaviour is scaffolding | Include the first real use in the same slice |
|
||||
| "Refactor X" | Refactors with no consumer test are invisible | Include the test that proves behaviour unchanged |
|
||||
|
||||
## Slice Sizing
|
||||
|
||||
**Right-sized slice:** completes in a single autonomous iteration, has one clear verifier, can be described in one sentence.
|
||||
|
||||
**Too large:** "Build the authentication system." Cut it: login endpoint → token validation → logout → password reset.
|
||||
|
||||
**Too small:** "Add an import statement." Merge it with the first meaningful use.
|
||||
|
||||
**Boundary check:** If a slice takes more than one session to complete, it is too large. Cut it.
|
||||
|
||||
## Completion Criteria
|
||||
|
||||
Each slice is done when:
|
||||
|
||||
- [ ] `Verifier` command runs and passes
|
||||
- [ ] The consumer path works end-to-end (not "the model is ready")
|
||||
- [ ] No "temporary stubs" left in production paths (test stubs are fine)
|
||||
- [ ] The done state matches what was defined before coding started
|
||||
|
||||
If the verifier passes but the done state wasn't defined upfront, you completed something — you just don't know what. Define done state first next time.
|
||||
Loading…
Add table
Reference in a new issue