fix: stabilize sf auto and subagent routing

This commit is contained in:
Mikael Hugo 2026-04-30 21:55:17 +02:00
parent da324da27e
commit 78be73fcb8
77 changed files with 5395 additions and 11 deletions

151
.github/workflows/dev-publish.yml vendored Normal file
View file

@ -0,0 +1,151 @@
# singularity-forge + CI: manual @dev channel publish with approval gate
name: Dev Publish
# Manual pre-release. Click "Run workflow" in the Actions tab to stamp a
# version and publish @dev to npm. Gated by the `dev` GitHub Environment
# (configure reviewers in repo Settings -> Environments).
on:
workflow_dispatch:
inputs:
ref:
description: 'Branch or SHA to publish as @dev'
required: false
default: 'main'
concurrency:
group: dev-publish-${{ github.event.inputs.ref }}
cancel-in-progress: false
permissions:
contents: read
packages: write
jobs:
dev-publish:
name: Dev Publish
runs-on: ubuntu-latest
environment: dev
outputs:
dev-version: ${{ steps.stamp.outputs.version }}
steps:
- uses: actions/checkout@v6
with:
ref: ${{ github.event.inputs.ref }}
token: ${{ secrets.RELEASE_PAT }}
fetch-depth: 0
- name: Mark workspace safe for git
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
- uses: actions/setup-node@v6
with:
node-version: 22
registry-url: https://registry.npmjs.org
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Install web host dependencies
run: npm --prefix web ci
- name: Cache Next.js build
uses: actions/cache@v4
with:
path: web/.next/cache
key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }}
restore-keys: |
nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-
nextjs-${{ runner.os }}-
- name: Build core
run: npm run build:core
- name: Build web host
run: npm run build:web-host
- name: Stamp dev version and sync platform packages
id: stamp
env:
VERSION_CHANNEL: dev
run: |
npm run pipeline:version-stamp
npm run sync-platform-versions
echo "version=$(node -e 'process.stdout.write(require("./package.json").version)')" >> "$GITHUB_OUTPUT"
- name: Smoke test
run: |
chmod +x dist/loader.js
export SF_SMOKE_BINARY="$(pwd)/dist/loader.js"
npm run test:smoke
- name: Publish @dev
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
VERSION=$(node -e 'process.stdout.write(require("./package.json").version)')
if npm view "singularity-forge@${VERSION}" version 2>/dev/null; then
echo "Version ${VERSION} already published — moving @dev tag"
npm dist-tag add "singularity-forge@${VERSION}" dev
else
npm publish --tag dev
fi
echo "Verifying singularity-forge@${VERSION} is reachable on npm..."
for i in 1 2 3 4 5; do
npm view "singularity-forge@${VERSION}" version 2>/dev/null && echo "Confirmed: singularity-forge@${VERSION} is live." && exit 0
echo "Attempt $i: not yet visible — waiting 10s..."
sleep 10
done
echo "::error::Publish step succeeded but singularity-forge@${VERSION} is not reachable on npm after 50s. Check NPM_TOKEN permissions and registry config."
exit 1
dev-verify:
name: Dev Verify (installed package)
needs: dev-publish
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
ref: ${{ github.event.inputs.ref }}
- uses: actions/setup-node@v6
with:
node-version: 22
registry-url: https://registry.npmjs.org
cache: 'npm'
- name: Install published singularity-forge@dev globally (with registry propagation retry)
env:
DEV_VERSION: ${{ needs.dev-publish.outputs.dev-version }}
run: |
for i in 1 2 3 4 5 6; do
npm install -g "singularity-forge@${DEV_VERSION}" && exit 0
echo "Attempt $i failed — waiting 10s for npm registry propagation..."
sleep 10
done
echo "::error::Failed to install singularity-forge@${DEV_VERSION} after 6 attempts."
echo "::error::Recommended actions: (1) investigate the failing step above, (2) if the version exists on npm, deprecate it with 'npm deprecate singularity-forge@${DEV_VERSION} \"broken build; see Actions run\"', (3) cut a fix and re-run Dev Publish."
exit 1
- name: Run smoke tests (against installed binary)
run: |
export SF_SMOKE_BINARY=$(which sf)
npm run test:smoke
- name: Install repo dependencies (for regression harness)
run: npm ci
- name: Run live regression tests (against installed binary)
run: |
export SF_SMOKE_BINARY=$(which sf)
npm run test:live-regression
- name: Warn on verify failure
if: failure()
env:
DEV_VERSION: ${{ needs.dev-publish.outputs.dev-version }}
run: |
echo "::error::Post-publish verification failed for singularity-forge@${DEV_VERSION}."
echo "::error::Recommended actions: (1) investigate the failing step above, (2) if the version exists on npm, deprecate it with 'npm deprecate singularity-forge@${DEV_VERSION} \"broken build; see Actions run\"', (3) cut a fix and re-run Dev Publish."
exit 1

86
.github/workflows/forensics-check.yml vendored Normal file
View file

@ -0,0 +1,86 @@
name: Forensics Check
on:
issues:
types: [opened, edited]
permissions:
issues: write
jobs:
check-forensics:
# Only run on bug reports
if: contains(github.event.issue.labels.*.name, 'bug')
runs-on: blacksmith-4vcpu-ubuntu-2404
steps:
- name: Check for forensics output and comment if missing
uses: actions/github-script@v7
with:
script: |
const body = context.payload.issue.body || '';
const issueNumber = context.payload.issue.number;
const forensicsMarker = 'Auto-generated by `/sf forensics`';
if (body.includes(forensicsMarker)) {
core.info('Forensics output found in issue body — no comment needed.');
return;
}
// Check comments too — reporter may have added it after opening
const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
});
const forensicsInComments = comments.data.some(c =>
c.body && c.body.includes(forensicsMarker)
);
if (forensicsInComments) {
core.info('Forensics output found in comments — no comment needed.');
return;
}
// Avoid duplicate bot comments
const botMarker = '<!-- sf-forensics-check -->';
const alreadyCommented = comments.data.some(c =>
c.user.type === 'Bot' && c.body && c.body.includes(botMarker)
);
if (alreadyCommented) {
core.info('Forensics request comment already posted — skipping duplicate.');
return;
}
const comment = [
botMarker,
'',
'Thanks for the bug report! To help us investigate, please run `/sf forensics` in your project and paste the output here.',
'',
'```bash',
'# In your project directory:',
'/sf forensics',
'```',
'',
'The forensics output includes git history analysis, session traces, stuck-loop detection, and cost data that significantly speeds up diagnosis.',
'',
'---',
'*This is an automated check. If `/sf forensics` is not available in your version, you can skip this step.*',
].join('\n');
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
body: comment,
});
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
labels: ['needs-forensics'],
});
core.info('Posted forensics request comment.');

143
.github/workflows/next-publish.yml vendored Normal file
View file

@ -0,0 +1,143 @@
name: Next Publish
# Manual pre-release. Click "Run workflow" in the Actions tab to stamp a
# version and publish @next to npm. Optional approval gate via the `next`
# GitHub Environment (configure reviewers in repo Settings -> Environments).
on:
workflow_dispatch:
inputs:
ref:
description: 'Branch or SHA to publish as @next'
required: false
default: 'next'
concurrency:
group: next-publish-${{ github.event.inputs.ref }}
cancel-in-progress: false
permissions:
contents: read
packages: write
jobs:
next-publish:
name: Next Publish
runs-on: ubuntu-latest
environment: next
outputs:
next-version: ${{ steps.stamp.outputs.version }}
steps:
- uses: actions/checkout@v6
with:
ref: ${{ github.event.inputs.ref }}
token: ${{ secrets.RELEASE_PAT }}
fetch-depth: 0
- name: Mark workspace safe for git
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
- uses: actions/setup-node@v6
with:
node-version: 22
registry-url: https://registry.npmjs.org
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Install web host dependencies
run: npm --prefix web ci
- name: Cache Next.js build
uses: actions/cache@v4
with:
path: web/.next/cache
key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }}
restore-keys: |
nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-
nextjs-${{ runner.os }}-
- name: Build core
run: npm run build:core
- name: Build web host
run: npm run build:web-host
- name: Stamp next version and sync platform packages
id: stamp
env:
VERSION_CHANNEL: next
run: |
npm run pipeline:version-stamp
npm run sync-platform-versions
echo "version=$(node -e 'process.stdout.write(require("./package.json").version)')" >> "$GITHUB_OUTPUT"
- name: Smoke test
run: |
chmod +x dist/loader.js
export SF_SMOKE_BINARY="$(pwd)/dist/loader.js"
npm run test:smoke
- name: Publish @next
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
VERSION=$(node -e 'process.stdout.write(require("./package.json").version)')
if npm view "singularity-forge@${VERSION}" version 2>/dev/null; then
echo "Version ${VERSION} already published — moving @next tag"
npm dist-tag add "singularity-forge@${VERSION}" next
else
npm publish --tag next
fi
next-verify:
name: Next Verify (installed package)
needs: next-publish
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
ref: ${{ github.event.inputs.ref }}
- uses: actions/setup-node@v6
with:
node-version: 22
registry-url: https://registry.npmjs.org
cache: 'npm'
- name: Install published singularity-forge@next globally (with registry propagation retry)
env:
NEXT_VERSION: ${{ needs.next-publish.outputs.next-version }}
run: |
for i in 1 2 3 4 5 6; do
npm install -g "singularity-forge@${NEXT_VERSION}" && exit 0
echo "Attempt $i failed — waiting 10s for npm registry propagation..."
sleep 10
done
echo "::error::Failed to install singularity-forge@${NEXT_VERSION} after 6 attempts. The @next tag may point at a broken artifact — deprecate it with: npm deprecate singularity-forge@${NEXT_VERSION} 'broken build'"
exit 1
- name: Run smoke tests (against installed binary)
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
export SF_SMOKE_BINARY=$(which sf)
npm run test:smoke
- name: Install repo dependencies (for regression harness)
run: npm ci
- name: Run live regression tests (against installed binary)
run: |
export SF_SMOKE_BINARY=$(which sf)
npm run test:live-regression
- name: Warn on verify failure
if: failure()
env:
NEXT_VERSION: ${{ needs.next-publish.outputs.next-version }}
run: |
echo "::error::Post-publish verification failed for singularity-forge@${NEXT_VERSION}. The @next tag still points at this version on npm."
echo "::error::Recommended actions: (1) investigate the failing step above, (2) deprecate the broken version with 'npm deprecate singularity-forge@${NEXT_VERSION} \"broken build; see Actions run\"', (3) cut a fix and re-run Next Publish."
exit 1

177
.github/workflows/prod-release.yml vendored Normal file
View file

@ -0,0 +1,177 @@
name: Prod Release
# Manual prod release. Click "Run workflow" in the Actions tab to cut @latest
# from main. Gated by the `prod` GitHub Environment approval before any
# publishing or commit-push side effects run.
on:
workflow_dispatch: {}
concurrency:
group: prod-release
cancel-in-progress: false
permissions:
contents: write
packages: write
pull-requests: write
jobs:
prod-release:
name: Production Release
runs-on: ubuntu-latest
environment: prod
steps:
- uses: actions/checkout@v6
with:
ref: main
fetch-depth: 0
token: ${{ secrets.RELEASE_PAT }}
- uses: actions/setup-node@v6
with:
node-version: 22
registry-url: https://registry.npmjs.org
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Cache Next.js build
uses: actions/cache@v4
with:
path: web/.next/cache
key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }}
restore-keys: |
nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-
nextjs-${{ runner.os }}-
- name: Run live LLM tests (optional)
continue-on-error: true
run: npm run test:live || echo "::warning::Live LLM tests failed — non-blocking, but worth investigating"
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SF_LIVE_TESTS: "1"
- name: Generate changelog and determine version
id: release
run: |
OUTPUT=$(node scripts/generate-changelog.mjs)
echo "$OUTPUT" | jq .
echo "version=$(echo "$OUTPUT" | jq -r '.newVersion')" >> "$GITHUB_OUTPUT"
echo "$OUTPUT" | jq -r '.changelogEntry' > /tmp/changelog-entry.md
echo "$OUTPUT" | jq -r '.releaseNotes' > /tmp/release-notes.md
- name: Bump version and sync packages
env:
RELEASE_VERSION: ${{ steps.release.outputs.version }}
run: node scripts/bump-version.mjs "$RELEASE_VERSION"
- name: Validate package files after version bump
run: |
node -e "require('./package.json')" && \
node -e "require('./packages/pi-coding-agent/package.json')" && \
node -e "require('./pkg/package.json')" && \
echo "All package.json files are valid"
- name: Update CHANGELOG.md
run: node scripts/update-changelog.mjs /tmp/changelog-entry.md
- name: Commit and tag release
env:
RELEASE_VERSION: ${{ steps.release.outputs.version }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add package.json package-lock.json web/package-lock.json CHANGELOG.md rust-engine/npm/*/package.json pkg/package.json packages/*/package.json
git commit -m "release: v${RELEASE_VERSION}"
git pull --rebase origin main
git tag "v${RELEASE_VERSION}"
- name: Build release
run: npm run build
- name: Publish release to npm @latest
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
RELEASE_VERSION: ${{ steps.release.outputs.version }}
run: |
OUTPUT=$(npm publish 2>&1) && echo "$OUTPUT" || {
if echo "$OUTPUT" | grep -q "cannot publish over the previously published"; then
echo "Version already published — promoting to latest"
npm dist-tag add "singularity-forge@${RELEASE_VERSION}" latest
else
echo "$OUTPUT"
exit 1
fi
}
- name: Push release commit and tag
env:
RELEASE_VERSION: ${{ steps.release.outputs.version }}
run: |
git push origin main
git push origin "v${RELEASE_VERSION}"
- name: Create GitHub Release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
RELEASE_VERSION: ${{ steps.release.outputs.version }}
run: |
gh release create "v${RELEASE_VERSION}" \
--title "v${RELEASE_VERSION}" \
--notes-file /tmp/release-notes.md \
--latest
- name: Post to Discord
if: ${{ env.DISCORD_WEBHOOK != '' }}
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_CHANGELOG_WEBHOOK }}
RELEASE_VERSION: ${{ steps.release.outputs.version }}
run: |
NOTES=$(cat /tmp/release-notes.md)
curl -s -X POST "$DISCORD_WEBHOOK" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "**SF v${RELEASE_VERSION} Released**\n\n${NOTES}\n\n\`npm i singularity-forge@${RELEASE_VERSION}\`" '{content:$c}')"
# Docker publish disabled — no ghcr.io package configured yet
# - name: Log in to GHCR
# uses: docker/login-action@v4
# with:
# registry: ghcr.io
# username: ${{ github.actor }}
# password: ${{ secrets.GITHUB_TOKEN }}
#
# - name: Build and push release Docker image
# env:
# RELEASE_VERSION: ${{ steps.release.outputs.version }}
# run: |
# docker build --target runtime \
# -t ghcr.io/singularity-ng/singularity-forge:latest \
# -t "ghcr.io/singularity-ng/singularity-forge:${RELEASE_VERSION}" \
# .
# docker push "ghcr.io/singularity-ng/singularity-forge:${RELEASE_VERSION}"
# docker push ghcr.io/singularity-ng/singularity-forge:latest
- name: Open back-merge PR main→next if behind
env:
GH_TOKEN: ${{ secrets.RELEASE_PAT }}
RELEASE_VERSION: ${{ steps.release.outputs.version }}
run: |
if ! git ls-remote --exit-code --heads origin next >/dev/null 2>&1; then
echo "next branch does not exist yet; skipping back-merge"
exit 0
fi
git fetch origin next main
BEHIND=$(git rev-list --count origin/next..origin/main)
if [ "$BEHIND" -gt 0 ]; then
BRANCH="backmerge/main-to-next-v${RELEASE_VERSION}"
git checkout -B "$BRANCH" origin/main
git push origin "$BRANCH" --force-with-lease
gh pr create --base next --head "$BRANCH" \
--title "chore: back-merge main to next (v${RELEASE_VERSION})" \
--body "Sync release commit and version bump from main into next." || true
else
echo "next is up to date with main; no back-merge needed"
fi

111
.github/workflows/version-check.yml vendored Normal file
View file

@ -0,0 +1,111 @@
name: Version Check
on:
issues:
types: [opened, edited]
permissions:
issues: write
jobs:
check-version:
if: ${{ github.event_name == 'issues' && contains(github.event.issue.body, 'SF version') }}
runs-on: ubuntu-latest
steps:
- name: Check SF version and comment if outdated
uses: actions/github-script@v7
with:
script: |
const body = context.payload.issue.body || '';
const issueNumber = context.payload.issue.number;
const match = body.match(/###\s+SF version\s*\n+\s*([^\s\n]+)/i);
if (!match) {
core.info('Could not find a SF version value in the issue body - skipping.');
return;
}
const reportedVersion = match[1].trim().replace(/^v/, '');
core.info('Reported version: ' + reportedVersion);
const npmResponse = await fetch('https://registry.npmjs.org/singularity-forge/latest');
if (!npmResponse.ok) {
core.setFailed('npm registry request failed: ' + npmResponse.status);
return;
}
const npmData = await npmResponse.json();
const latestVersion = npmData.version;
core.info('Latest version: ' + latestVersion);
function parseVersion(v) {
const parts = v.replace(/^v/, '').split('.').map(Number);
return [parts[0] || 0, parts[1] || 0, parts[2] || 0];
}
function isOutdated(reported, latest) {
const r = parseVersion(reported);
const l = parseVersion(latest);
if (r[0] !== l[0]) return r[0] < l[0];
if (r[1] !== l[1]) return r[1] < l[1];
return r[2] < l[2];
}
if (!isOutdated(reportedVersion, latestVersion)) {
core.info('Version ' + reportedVersion + ' is current - no comment needed.');
return;
}
const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
});
const botMarker = '<!-- sf-version-check -->';
const alreadyCommented = comments.data.some(function (c) {
return c.user.type === 'Bot' && c.body.indexOf(botMarker) !== -1;
});
if (alreadyCommented) {
core.info('Version check comment already posted - skipping duplicate.');
return;
}
const lines = [
botMarker,
'',
'Thanks for filing this bug report!',
'',
'It looks like you are running **SF v' + reportedVersion + '**, but the latest release is **v' + latestVersion + '**.',
'',
'Before we investigate further, please upgrade and check whether the issue still occurs:',
'',
'```bash',
'npm install -g singularity-forge@latest',
'sf --version # should print ' + latestVersion,
'```',
'',
'Then re-run your reproduction steps. If the problem persists on **v' + latestVersion + '**, please update the **SF version** field in this issue and let us know.',
'',
'> **Why?** Many bugs are fixed in subsequent releases. Confirming on the latest version keeps the team focused on real, current issues.',
'',
'---',
'*This is an automated check. If you are intentionally pinned to an older version, feel free to explain why and we will continue from there.*',
];
const comment = lines.join('\n');
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
body: comment,
});
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
labels: ['needs-upgrade'],
});
core.info('Posted upgrade prompt for v' + reportedVersion + ' -> v' + latestVersion);

19
.siftignore Normal file
View file

@ -0,0 +1,19 @@
.git/**
.sf/**
.bg-shell/**
.pytest_cache/**
.venv/**
venv/**
node_modules/**
**/node_modules/**
**/__pycache__/**
*.pyc
*.egg-info/**
build/**
dist/**
target/**
vendor/**
coverage/**
.cache/**
tmp/**
*.log

1
0 Normal file
View file

@ -0,0 +1 @@
.sf/CODEBASE.md:2

20
ARCHITECTURE.md Normal file
View file

@ -0,0 +1,20 @@
# Architecture
This file is the short map of the codebase. Keep it current and compact.
## Purpose
Describe the product, its users, and the job this repository exists to do.
## Codemap
- `src/`: primary implementation.
- `tests/`: behavior and regression coverage.
- `docs/`: durable product, design, plan, reliability, and security context.
## Invariants
- Prefer small, named modules with clear ownership.
- Behavior changes need tests or an explicit eval.
- Keep generated artifacts out of hand-written design docs.
- Update this map when new top-level concepts or directories become important.

1
TODO.md Normal file
View file

@ -0,0 +1 @@
# Raw Dump Inbox\n\n## Eval Candidates\n\n1. Test note for CI mode verification

3
docs/DESIGN.md Normal file
View file

@ -0,0 +1,3 @@
# Design
Record interaction patterns, visual constraints, and design-system usage here.

3
docs/FRONTEND.md Normal file
View file

@ -0,0 +1,3 @@
# Frontend
Record frontend architecture, component ownership, accessibility constraints, and browser support here.

3
docs/PLANS.md Normal file
View file

@ -0,0 +1,3 @@
# Plans
Use this as the index for current and upcoming work. Link detailed plans in `docs/exec-plans/`.

3
docs/PRODUCT_SENSE.md Normal file
View file

@ -0,0 +1,3 @@
# Product Sense
Capture user goals, non-goals, tradeoffs, and examples of good product judgment for this repo.

10
docs/QUALITY_SCORE.md Normal file
View file

@ -0,0 +1,10 @@
# Quality Score
Define what good looks like for this repo. Include fast checks, slow checks, evals, and known blind spots.
Use these principles:
- Make code legible to agents with semantic names and explicit boundaries.
- Prefer small, testable modules over files that require broad context to edit.
- Enforce style, architecture, and reliability rules mechanically where possible.
- Keep a cleanup loop for stale docs, generated artifacts, and accumulated implementation debt.

35
docs/RECORDS_KEEPER.md Normal file
View file

@ -0,0 +1,35 @@
# Records Keeper
The records keeper keeps repo memory ordered after meaningful changes. Run this checklist at milestone close, after architecture changes, after product behavior changes, and whenever docs/source disagree.
Use the `records-keeper` skill for this workflow when SF skills are available. Use `context-doctor` instead when stale state lives under `.sf/` or the memory store.
## Canonical Homes
- Root `AGENTS.md`: short routing map for agents.
- `ARCHITECTURE.md`: short system map, boundaries, invariants, critical flows, and verification.
- `docs/product-specs/`: durable user-facing behavior and product decisions.
- `docs/design-docs/`: durable design and architecture decisions.
- `docs/exec-plans/`: active/completed work plans and technical debt.
- `docs/generated/`: generated references only.
- `docs/records/`: audits, ledgers, and context-gardening outputs.
## Checklist
- Root map is current: `AGENTS.md` points to the right canonical docs and local `AGENTS.md` files.
- Architecture is current: new subsystems, boundaries, invariants, data/state, or critical flows are reflected in `ARCHITECTURE.md`.
- Product specs are current: user-visible behavior changes are reflected in `docs/product-specs/`.
- Execution plans are filed: active work is in `docs/exec-plans/active/`; completed summaries and evidence are in `docs/exec-plans/completed/`.
- Debt is visible: discovered cleanup is listed in `docs/exec-plans/tech-debt-tracker.md`.
- Generated docs are marked: generated material stays under `docs/generated/` or clearly says how to regenerate it.
- Contradictions are resolved: stale docs are updated or marked superseded with links to the source of truth.
- Verification is recorded: changed checks, evals, and commands are listed in the relevant plan or quality document.
## Output
When records work is non-trivial, write a dated note under `docs/records/` with:
- What changed.
- What canonical docs were updated.
- What contradictions were found.
- What remains unresolved.

3
docs/RELIABILITY.md Normal file
View file

@ -0,0 +1,3 @@
# Reliability
Document expected failure modes, recovery paths, observability, and release checks here.

3
docs/SECURITY.md Normal file
View file

@ -0,0 +1,3 @@
# Security
Document trust boundaries, secrets handling, dependency risk, and security review requirements here.

View file

@ -0,0 +1,5 @@
# Core Beliefs
- The repo should explain itself to humans and agents.
- Plans should carry acceptance criteria, falsifiers, and verification commands.
- Architecture should be mechanically checkable where possible.

View file

@ -0,0 +1,3 @@
# Design Docs
Durable design decisions live here. Link active proposals, completed decisions, and rejected alternatives.

View file

@ -0,0 +1,3 @@
# Active Execution Plans
Link active plans here. Each plan should state purpose, scope, tasks, acceptance criteria, and verification.

View file

@ -0,0 +1,3 @@
# Completed Execution Plans
Move finished plan summaries here with evidence links and follow-up debt.

View file

@ -0,0 +1,3 @@
# Tech Debt Tracker
Track cleanup discovered during implementation. Include owner, impact, proposed fix, and verification.

View file

@ -0,0 +1,3 @@
# Database Schema
Generated or refreshed schema notes belong here. Do not hand-maintain stale schema copies.

View file

@ -0,0 +1,3 @@
# Product Specs
Durable user-facing behavior, workflows, and product decisions live here.

View file

@ -0,0 +1,3 @@
# New User Onboarding
Describe the first-run experience, success criteria, and failure states when this product has an onboarding flow.

3
docs/records/index.md Normal file
View file

@ -0,0 +1,3 @@
# Records
This folder holds repo-memory audits, decision ledgers, context-gardening notes, and records-keeper outputs.

View file

@ -0,0 +1 @@
Reference slot for design-system guidance intended for LLM consumption.

View file

@ -0,0 +1 @@
Reference slot for Nixpacks deployment/build guidance intended for LLM consumption.

View file

@ -0,0 +1 @@
Reference slot for uv/Python tooling guidance intended for LLM consumption.

View file

@ -0,0 +1,83 @@
import { shortHash } from "../utils/hash.js";
const MAX_TOOL_ARGUMENT_KEY_LENGTH = 256;
const LONG_KEY_PREFIX = "tool_arg_";
function isObject(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
function clampKey(base: string, maxLength: number): string {
return base.length <= maxLength ? base : base.slice(0, maxLength);
}
function makeSafeKey(
key: string,
maxLength: number,
usedKeys: Set<string>,
seen: Map<string, string>,
): string {
if (key.length <= maxLength && !usedKeys.has(key)) {
return key;
}
if (usedKeys.has(key)) {
const base = `${LONG_KEY_PREFIX}${shortHash(key)}`;
const safeBase = clampKey(base, maxLength);
let next = 0;
let candidate = safeBase;
while (usedKeys.has(candidate)) {
candidate = clampKey(`${safeBase}_${next}`, maxLength);
next += 1;
}
seen.set(key, candidate);
return candidate;
}
const existing = seen.get(key);
if (existing) {
let next = 0;
let candidate = existing;
while (usedKeys.has(candidate)) {
candidate = clampKey(`${existing}_${next}`, maxLength);
next += 1;
}
return candidate;
}
const base = `${LONG_KEY_PREFIX}${shortHash(key)}`;
const safeBase = clampKey(base, maxLength);
let next = 0;
let candidate = safeBase;
while (usedKeys.has(candidate)) {
candidate = clampKey(`${safeBase}_${next}`, maxLength);
next += 1;
}
seen.set(key, candidate);
return candidate;
}
export function sanitizeToolCallArgumentsForSerialization(
args: unknown,
maxKeyLength = MAX_TOOL_ARGUMENT_KEY_LENGTH,
): unknown {
if (isObject(args)) {
const output: Record<string, unknown> = {};
const usedKeys = new Set<string>();
const replacements = new Map<string, string>();
for (const [key, value] of Object.entries(args)) {
const safeKey = makeSafeKey(key, maxKeyLength, usedKeys, replacements);
output[safeKey] = sanitizeToolCallArgumentsForSerialization(value, maxKeyLength);
usedKeys.add(safeKey);
}
return output;
}
if (Array.isArray(args)) {
return args.map((entry) => sanitizeToolCallArgumentsForSerialization(entry, maxKeyLength));
}
return args;
}

Binary file not shown.

146
src/errors.ts Normal file
View file

@ -0,0 +1,146 @@
/**
* errors.ts Structured error types for consistent, actionable CLI diagnostics.
*
* Purpose: every error path in the CLI and headless orchestrator should be
* able to emit context that helps users (and future debuggers) understand
* *what* failed, *where*, and *what to try next* without depending on
* heavy error-handling libraries.
*
* Consumer: cli.ts, headless.ts, and any extension that surfaces user-facing
* failures. The types are plain data so they serialize cleanly to stderr,
* JSON batch output, and trace spans.
*/
// ---------------------------------------------------------------------------
// Core structured error type
// ---------------------------------------------------------------------------
/**
* A user-facing or machine-readable error record with rich context.
*
* All fields are optional except `message` so that call-sites can incrementally
* adopt structured errors without rewriting every catch block at once.
*/
export interface StructuredError {
/** Human-readable description of what went wrong. */
message: string;
/** The high-level operation that was in progress when the error occurred,
* e.g. "graph build", "session resume", "model validation". */
operation?: string;
/** The file path most relevant to the failure (the file being read,
* written, or expected). */
file?: string;
/** The line number inside `file` if known (e.g. from a parser error). */
line?: number;
/** Actionable guidance for the user — what to check or try next. */
guidance?: string;
/** Whether retrying the same operation (with the same inputs) might
* succeed, e.g. transient network failures. */
retry?: boolean;
/** The underlying cause, if this error wraps another. Kept as `unknown`
* so callers aren't forced to coerce to Error. */
cause?: unknown;
}
// ---------------------------------------------------------------------------
// Convenience constructors
// ---------------------------------------------------------------------------
/**
* Create a {@link StructuredError} from a message and optional context.
*
* Purpose: reduce boilerplate at catch sites where we want to enrich a raw
* exception with operation/file context before logging or returning it.
*
* Consumer: cli.ts catch blocks, headless.ts event handlers.
*/
export function error(
message: string,
ctx?: Omit<StructuredError, "message">,
): StructuredError {
return { message, ...ctx };
}
// ---------------------------------------------------------------------------
// Formatters
// ---------------------------------------------------------------------------
/**
* Format a {@link StructuredError} as plain text suitable for stderr.
*
* Output shape (fields omitted when undefined):
* [sf] Error: <message>
* Operation: <operation>
* File: <file>:<line>
* Guidance: <guidance>
* Retryable: yes|no
*/
export function formatStructuredError(
err: StructuredError,
prefix = "[sf]",
): string {
const parts: string[] = [`${prefix} Error: ${err.message}`];
if (err.operation) {
parts.push(` Operation: ${err.operation}`);
}
if (err.file) {
const line = err.line !== undefined ? `:${err.line}` : "";
parts.push(` File: ${err.file}${line}`);
}
if (err.guidance) {
parts.push(` Guidance: ${err.guidance}`);
}
if (err.retry !== undefined) {
parts.push(` Retryable: ${err.retry ? "yes" : "no"}`);
}
return parts.join("\n") + "\n";
}
/**
* Format a {@link StructuredError} as a JSON object.
*
* Purpose: headless --output-format json mode can embed structured errors
* in the result payload instead of interleaving free-form text on stderr.
*/
export function errorToJson(err: StructuredError): Record<string, unknown> {
const out: Record<string, unknown> = { message: err.message };
if (err.operation !== undefined) out.operation = err.operation;
if (err.file !== undefined) out.file = err.file;
if (err.line !== undefined) out.line = err.line;
if (err.guidance !== undefined) out.guidance = err.guidance;
if (err.retry !== undefined) out.retry = err.retry;
if (err.cause !== undefined) {
out.cause =
err.cause instanceof Error
? { message: err.cause.message, name: err.cause.name }
: String(err.cause);
}
return out;
}
// ---------------------------------------------------------------------------
// Predicates
// ---------------------------------------------------------------------------
/**
* Narrow an `unknown` value to a {@link StructuredError}.
*
* Purpose: safe type guards at catch boundaries where the thrown value may
* be a plain Error, a StructuredError, or something else entirely.
*/
export function isStructuredError(val: unknown): val is StructuredError {
return (
typeof val === "object" &&
val !== null &&
"message" in val &&
typeof (val as Record<string, unknown>).message === "string"
);
}

View file

@ -1046,6 +1046,10 @@ export async function postUnitPreVerification(
s.basePath,
expectedOutput,
plannedFiles,
{
source: s.stagedPendingCommit ? "staged" : "last-commit",
baselineFiles: s.preUnitDirtyFiles,
},
);
if (audit && audit.violations.length > 0) {
const warnings = audit.violations.filter(

View file

@ -51,6 +51,7 @@ import {
readProductionMutationApprovalStatus,
} from "../production-mutation-approval.js";
import { resetEvidence } from "../safety/evidence-collector.js";
import { getDirtyFiles } from "../safety/file-change-validator.js";
import {
cleanupCheckpoint,
createCheckpoint,
@ -1776,6 +1777,15 @@ export async function runUnitPhase(
if (safetyConfig.enabled && safetyConfig.evidence_collection) {
resetEvidence();
}
if (
safetyConfig.enabled &&
safetyConfig.file_change_validation &&
unitType === "execute-task"
) {
s.preUnitDirtyFiles = getDirtyFiles(s.basePath);
} else {
s.preUnitDirtyFiles = [];
}
// Only checkpoint code-executing units (not lifecycle/planning units)
if (
safetyConfig.enabled &&
@ -2320,6 +2330,7 @@ export async function runUnitPhase(
}
s.checkpointSha = null;
}
s.preUnitDirtyFiles = [];
return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
}

View file

@ -186,6 +186,8 @@ export class AutoSession {
// ── Safety harness ───────────────────────────────────────────────────────
/** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
checkpointSha: string | null = null;
/** Dirty files captured before the current execute-task unit starts. */
preUnitDirtyFiles: string[] = [];
// ── Deferred commit (Fix 1) ──────────────────────────────────────────────
/**
@ -326,6 +328,7 @@ export class AutoSession {
this.isolationDegraded = false;
this.milestoneMergedInPhases = false;
this.checkpointSha = null;
this.preUnitDirtyFiles = [];
this.stagedPendingCommit = false;
this.pendingCommitTaskContext = null;

View file

@ -0,0 +1,186 @@
/**
* Doc Scaffold Checker validates the agentic docs scaffold is filled in.
*
* Purpose: Mechanical enforcement of harness-engineering principles. After
* bootstrap or milestone close, check that scaffold files contain real content
* beyond the template stubs. Reports findings so the agent knows what needs
* attention never blocks, only surfaces.
*
* Consumer: bootstrapProject (after scaffold init), milestone close workflows.
*/
import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
import { join } from "node:path";
export interface ScaffoldCheck {
file: string; // relative path from repo root
status: "ok" | "empty" | "stub" | "missing";
lines: number;
note: string;
}
export interface DocScaffoldReport {
checkedAt: string;
repoRoot: string;
checks: ScaffoldCheck[];
summary: {
total: number;
ok: number;
empty: number;
stub: number;
missing: number;
};
}
/** Files created by ensureAgenticDocsScaffold that should contain real content. */
const SCAFFOLD_FILES = [
// Root routing
"AGENTS.md",
"ARCHITECTURE.md",
// docs/ structure
"docs/AGENTS.md",
"docs/PLANS.md",
"docs/DESIGN.md",
"docs/FRONTEND.md",
"docs/QUALITY_SCORE.md",
"docs/RELIABILITY.md",
"docs/SECURITY.md",
"docs/product-specs/index.md",
"docs/product-specs/new-user-onboarding.md",
"docs/design-docs/index.md",
"docs/design-docs/core-beliefs.md",
"docs/exec-plans/active/index.md",
"docs/exec-plans/completed/index.md",
"docs/exec-plans/tech-debt-tracker.md",
"docs/exec-plans/AGENTS.md",
"docs/records/index.md",
"docs/records/AGENTS.md",
"docs/RECORDS_KEEPER.md",
// src/ and tests/ routing
"src/AGENTS.md",
"tests/AGENTS.md",
] as const;
// Minimum lines considered "real content" vs stub. Template stubs are ~3-8 lines.
const STUB_LINE_COUNT = 10;
// Files that are allowed to stay as stubs (index/placeholder files)
const STUB_ALLOWED = new Set([
"docs/product-specs/index.md",
"docs/design-docs/index.md",
"docs/exec-plans/active/index.md",
"docs/exec-plans/completed/index.md",
"docs/records/index.md",
]);
function countContentLines(content: string): number {
// Count non-empty, non-comment lines
return content
.split("\n")
.filter((line) => {
const trimmed = line.trim();
return trimmed.length > 0 && !trimmed.startsWith("//") && !trimmed.startsWith("#");
})
.length;
}
function checkFile(repoRoot: string, relPath: string): ScaffoldCheck {
const fullPath = join(repoRoot, relPath);
if (!existsSync(fullPath)) {
return {
file: relPath,
status: "missing",
lines: 0,
note: "File does not exist — scaffold not run or was interrupted",
};
}
let content: string;
try {
const stat = statSync(fullPath);
if (stat.isDirectory()) {
return { file: relPath, status: "stub", lines: 0, note: "Is a directory, expected a file" };
}
content = readFileSync(fullPath, "utf-8");
} catch {
return { file: relPath, status: "stub", lines: 0, note: "Could not read file" };
}
const lines = content.split("\n").filter((l) => l.trim().length > 0).length;
const contentLines = countContentLines(content);
if (lines === 0) {
return { file: relPath, status: "empty", lines: 0, note: "File is empty" };
}
if (contentLines < STUB_LINE_COUNT) {
const note = STUB_ALLOWED.has(relPath)
? `Stub file (${lines} lines) — acceptable for index/placeholder`
: `Stub file (${lines} lines) — needs real content beyond template`;
return {
file: relPath,
status: STUB_ALLOWED.has(relPath) ? "ok" : "stub",
lines,
note,
};
}
return {
file: relPath,
status: "ok",
lines,
note: `Contains ${contentLines} content lines`,
};
}
/**
* Check all scaffold files in a repo. Returns a structured report.
* Never throws all errors are caught and reported as stub/missing.
*/
export function checkDocsScaffold(repoRoot: string): DocScaffoldReport {
const checks: ScaffoldCheck[] = [];
for (const file of SCAFFOLD_FILES) {
checks.push(checkFile(repoRoot, file));
}
const summary = {
total: checks.length,
ok: checks.filter((c) => c.status === "ok").length,
empty: checks.filter((c) => c.status === "empty").length,
stub: checks.filter((c) => c.status === "stub").length,
missing: checks.filter((c) => c.status === "missing").length,
};
return {
checkedAt: new Date().toISOString(),
repoRoot,
checks,
summary,
};
}
/**
* Format a report as human-readable text for logging to stderr.
*/
export function formatDocCheckReport(report: DocScaffoldReport): string {
const lines: string[] = [];
lines.push(`[doc-checker] Scaffold check — ${report.checkedAt}`);
lines.push(
` ${report.summary.ok}/${report.summary.total} files OK`
+ (report.summary.stub > 0 ? ` · ${report.summary.stub} need content` : "")
+ (report.summary.missing > 0 ? ` · ${report.summary.missing} missing` : "")
+ (report.summary.empty > 0 ? ` · ${report.summary.empty} empty` : ""),
);
const issues = report.checks.filter((c) => c.status !== "ok");
if (issues.length > 0) {
lines.push(" Files needing attention:");
for (const issue of issues) {
lines.push(` [${issue.status}] ${issue.file}${issue.note}`);
}
} else {
lines.push(" All scaffold files contain real content.");
}
return lines.join("\n");
}

View file

@ -24,15 +24,19 @@ All relevant context has been preloaded below — the roadmap, all slice summari
### Step 1 — Dispatch Parallel Reviewers
Call `subagent` with `tasks: [...]` containing ALL THREE reviewers simultaneously:
Call `subagent` with `tasks: [...]` containing ALL THREE reviewers simultaneously.
Use `agent: "reviewer"` for every validation reviewer. Do not use `code`, `coder`, or `worker` here — this is review/validation work, not implementation.
**Reviewer A — Requirements Coverage**
Agent: `reviewer`
Prompt: "Review milestone {{milestoneId}} requirements coverage. Working directory: {{workingDirectory}}. Read `.sf/{{milestoneId}}/REQUIREMENTS.md` (or equivalent requirements file). For each requirement, check the slice SUMMARY files in `.sf/{{milestoneId}}/` to determine if it is: COVERED (clearly demonstrated), PARTIAL (mentioned but not fully demonstrated), or MISSING (no evidence). Output a markdown table with columns: Requirement | Status | Evidence. End with a one-line verdict: PASS if all covered, NEEDS-ATTENTION if partials exist, FAIL if any missing."
**Reviewer B — Cross-Slice Integration**
Agent: `reviewer`
Prompt: "Review milestone {{milestoneId}} cross-slice integration. Working directory: {{workingDirectory}}. Read `{{roadmapPath}}` and find the boundary map (produces/consumes contracts). For each boundary, check that the producing slice's SUMMARY confirms it produced the artifact, and the consuming slice's SUMMARY confirms it consumed it. Output a markdown table: Boundary | Producer Summary | Consumer Summary | Status. End with a one-line verdict: PASS if all boundaries honored, NEEDS-ATTENTION if any gaps."
**Reviewer C — Assessment & Acceptance Criteria**
Agent: `reviewer`
Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.sf/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for ASSESSMENT files in each slice directory. Verify each acceptance criterion maps to either a passing assessment result or clear SUMMARY evidence. Then review the inlined milestone verification classes from planning. For each non-empty planned class, output a markdown table: Class | Planned Check | Evidence | Verdict. Use the exact class names `Contract`, `Integration`, `Operational`, and `UAT` whenever those classes are present. If no verification classes were planned, say that explicitly. Output two sections: `Acceptance Criteria` with a checklist `[ ] Criterion | Evidence`, and `Verification Classes` with the table. End with a one-line verdict: PASS if all criteria and verification classes are covered, NEEDS-ATTENTION if gaps exist."
### Step 2 — Synthesize Findings

View file

@ -1,10 +1,11 @@
/**
* Post-unit file change validator for auto-mode safety harness.
* Compares actual git diff against the task plan's expected output files.
* Compares actual file changes against the task plan's expected output files.
*
* Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
* and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
* Compares against git diff HEAD~1 --name-only after auto-commit.
* Defaults to git diff HEAD~1 --name-only after auto-commit. Deferred-commit
* flows can instead validate the staged diff before the commit is created.
*
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
*/
@ -26,9 +27,15 @@ export interface FileChangeAudit {
actualFiles: string[];
unexpectedFiles: string[];
missingFiles: string[];
ignoredBaselineFiles: string[];
violations: FileViolation[];
}
export interface FileChangeValidationOptions {
source?: "last-commit" | "staged";
baselineFiles?: readonly string[];
}
// ─── Public API ─────────────────────────────────────────────────────────────
/**
@ -43,34 +50,45 @@ export function validateFileChanges(
basePath: string,
expectedOutput: string[],
plannedFiles: string[],
options: FileChangeValidationOptions = {},
): FileChangeAudit | null {
const allExpected = new Set([...expectedOutput, ...plannedFiles]);
// If no expected files were planned, skip validation
if (allExpected.size === 0) return null;
// Get actual changed files from last commit
const actualFiles = getChangedFilesFromLastCommit(basePath);
const source = options.source ?? "last-commit";
const actualFiles =
source === "staged"
? getChangedFilesFromStagedDiff(basePath)
: getChangedFilesFromLastCommit(basePath);
if (!actualFiles) return null;
// Filter out .sf/ internal files — only validate project source files
const projectFiles = actualFiles.filter(
(f) => !f.startsWith(".sf/") && !f.startsWith(".sf\\"),
);
const baselineFiles = new Set(
(options.baselineFiles ?? []).map(normalizeProjectPath),
);
const validationFiles = projectFiles
.map(normalizeProjectPath)
.filter((f) => !baselineFiles.has(f));
const ignoredBaselineFiles = projectFiles
.map(normalizeProjectPath)
.filter((f) => baselineFiles.has(f));
// Normalize expected paths (strip leading ./ or /)
const normalizedExpected = new Set(
[...allExpected].map((f) =>
normalizePlannedFileReference(f).replace(/^\.\//, "").replace(/^\//, ""),
),
[...allExpected].map((f) => normalizeProjectPath(normalizePlannedFileReference(f))),
);
// Compute symmetric difference
const unexpectedFiles = projectFiles.filter(
const unexpectedFiles = validationFiles.filter(
(f) => !normalizedExpected.has(f),
);
const missingFiles = [...normalizedExpected].filter(
(f) => !projectFiles.includes(f),
(f) => !validationFiles.includes(f),
);
const violations: FileViolation[] = [];
@ -93,15 +111,50 @@ export function validateFileChanges(
return {
expectedFiles: [...normalizedExpected],
actualFiles: projectFiles,
actualFiles: validationFiles,
unexpectedFiles,
missingFiles,
ignoredBaselineFiles,
violations,
};
}
/**
* Capture the dirty-file baseline at unit start. Post-unit validation uses this
* to avoid warning on files that were already dirty before the task ran.
*/
export function getDirtyFiles(basePath: string): string[] {
try {
const result = execFileSync(
"git",
["status", "--porcelain=v1", "--untracked-files=all"],
{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
).trim();
if (!result) return [];
return result
.split("\n")
.map((line) => line.slice(3).trim())
.map((file) => {
const renamed = file.split(" -> ");
return renamed[renamed.length - 1] ?? file;
})
.filter(Boolean)
.map(normalizeProjectPath);
} catch (e) {
logWarning(
"safety",
`git status failed in file-change-validator: ${(e as Error).message}`,
);
return [];
}
}
// ─── Internals ──────────────────────────────────────────────────────────────
function normalizeProjectPath(file: string): string {
return file.replace(/^\.\//, "").replace(/^\//, "");
}
function getChangedFilesFromLastCommit(basePath: string): string[] | null {
try {
const result = execFileSync(
@ -118,3 +171,20 @@ function getChangedFilesFromLastCommit(basePath: string): string[] | null {
return null;
}
}
function getChangedFilesFromStagedDiff(basePath: string): string[] | null {
try {
const result = execFileSync(
"git",
["diff", "--name-only", "--cached"],
{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
).trim();
return result ? result.split("\n").filter(Boolean).map(normalizeProjectPath) : [];
} catch (e) {
logWarning(
"safety",
`git staged diff failed in file-change-validator: ${(e as Error).message}`,
);
return null;
}
}

View file

@ -0,0 +1,223 @@
---
name: researcher
description: Researches codebase, project state, and external knowledge using local search, SF database queries, and web search. Use when investigating an unfamiliar subsystem, understanding project milestones and requirements, or gathering evidence for planning. Produces structured research reports.
---
<objective>
Research a topic using four complementary information sources, in priority order:
1. **Serena MCP** (46 LSP-backed tools: symbol search, file read, find references, pattern search) — use FIRST for code exploration
2. **sift** (hybrid BM25+vector local search) — use when Serena symbol search isn't enough
3. **SF project database** (sqlite3) — use for project state (milestones, requirements, decisions)
4. **Web search** — use for external documentation and current information
This skill is the first step before planning — it produces the evidence base that drives good decisions. Without research, agents plan from assumptions; with this skill, they plan from evidence.
</objective>
<quick_start>
**Serena MCP (code intelligence — USE FIRST for code exploration):**
```bash
# Discover Serena tools (she has 46 tools available)
mcp_servers
# Get Serena's full tool list
mcp_discover server=serena
# Symbol search — find where a function/type is defined
mcp_call server=serena tool=find_symbol arguments={contextLines=5,matchPattern="resolveSubagentLaunchSpec"}
# Find all references to a symbol (callers, usages)
mcp_call server=serena tool=find_referencing_symbols arguments={contextLines=3,matchPattern="resolveSubagentLaunchSpec"}
# Read a file (Serena's LSP-backed read is faster than bash cat)
mcp_call server=serena tool=read_file arguments={file_path="src/resources/extensions/subagent/index.ts"}
# Search for pattern in files
mcp_call server=serena tool=search_for_pattern arguments={pattern="call_scout",filePattern="*.ts",contextLines=3}
# List directory
mcp_call server=serena tool=list_dir arguments={path="src/resources/extensions/sf/skills/"}
```
**Local code search — sift (hybrid BM25+vector search):**
```bash
sift search --strategy path-hybrid "authentication middleware"
sift search --strategy hybrid --limit 5 "where is the write gate registered"
```
**SF project database queries:**
```bash
# Current milestone and slices
sqlite3 .sf/sf.db "SELECT id, title, status FROM milestones WHERE status='active'"
# All requirements
sqlite3 .sf/sf.db "SELECT id, class, status, description FROM requirements"
# Recent decisions
sqlite3 .sf/sf.db "SELECT id, scope, decision FROM decisions ORDER BY seq DESC LIMIT 10"
# Tasks by slice
sqlite3 .sf/sf.db "SELECT id, title, status FROM tasks WHERE milestone_id='M001' AND slice_id='S01'"
```
**Web search — use the search-the-web tool directly for current information.**
</quick_start>
<workflow>
## Step 1: Clarify the research goal
Before searching, identify what you need to know:
- **Code exploration** (finding functions, types, references) → use Serena MCP first
- **Project state** (milestones, slices, tasks, requirements) → query the SF DB
- **Current external information** → use web search
- **All of the above** → combine all four sources
## Step 2: Explore code with Serena MCP (priority)
Serena is an LSP-backed code intelligence layer. Use `mcp_call` to invoke her tools:
```bash
# Find where a function or type is defined
mcp_call server=serena tool=find_symbol arguments={matchPattern="MyFunction",contextLines=5}
# Find all callers/references to a symbol
mcp_call server=serena tool=find_referencing_symbols arguments={matchPattern="MyFunction",contextLines=3}
# Read a specific file
mcp_call server=serena tool=read_file arguments={file_path="src/my-file.ts"}
# Grep-like search across the codebase
mcp_call server=serena tool=search_for_pattern arguments={pattern="TODO.*auth",filePattern="*.ts"}
```
## Step 3: Supplement with sift (when Serena isn't enough)
Use sift when you need semantic/hybrid search across unstructured content:
```bash
# Hybrid search for conceptual matches
sift search --strategy hybrid --limit 5 "authentication middleware token validation"
```
## Step 4: Query the SF project database
The SF database (`.sf/sf.db`) contains the canonical project state:
```bash
# List active milestones with their slices
sqlite3 .sf/sf.db "
SELECT m.id, m.title, m.status, s.id, s.title, s.status
FROM milestones m
LEFT JOIN slices s ON s.milestone_id = m.id
WHERE m.status IN ('active','planning')
ORDER BY m.id, s.id
"
# Get requirements by status
sqlite3 .sf/sf.db "SELECT id, class, status, description FROM requirements WHERE status='active'"
# Recent decisions (most recent first)
sqlite3 .sf/sf.db "SELECT id, scope, decision, choice FROM decisions ORDER BY seq DESC LIMIT 20"
# Blocked or pending tasks
sqlite3 .sf/sf.db "SELECT id, title, status FROM tasks WHERE status IN ('blocked','pending')"
# Artifacts (plans, summaries) for a milestone
sqlite3 .sf/sf.db "SELECT path, artifact_type FROM artifacts WHERE milestone_id='M001'"
```
## Step 5: Web search for external information
Use `search-the-web` for documentation, tutorials, or current best practices:
```bash
search_the_web "Next.js 15 app router migration guide"
```
## Step 6: Synthesize into a research report
Write findings to the appropriate artifact:
- Milestone research → `.sf/milestones/{mid}/{mid}-RESEARCH.md`
- Slice research → `.sf/milestones/{mid}/slices/{sid}/{sid}-RESEARCH.md`
- Ad-hoc research → `.sf/research/{topic}.md`
**Research report structure:**
```markdown
# Research: {topic}
## Goal
What question are we answering?
## SF Project State
What does the SF DB say? (milestones, requirements, decisions relevant to this topic)
## Codebase Evidence
What did sift find? (key file:line references)
## External Knowledge
What did web search reveal?
## Findings
Bullet points of the most important discoveries
## Gaps
What is still unknown or needs verification?
## Recommendations
What should the agent do next?
```
</workflow>
<success_criteria>
- Research report written to the correct artifact path
- At least one SF DB query executed and cited
- At least one sift search executed and cited
- Findings are specific (file:line or table:row references), not generic
- Gaps identified honestly — what you could not determine
</success_criteria>
<reference_guides>
### Useful SF DB queries
```sql
-- All milestones with completion status
SELECT id, title, status, completed_at FROM milestones ORDER BY id;
-- All slices for a milestone
SELECT id, title, status, risk FROM slices WHERE milestone_id='M001' ORDER BY sequence;
-- Tasks with verification status
SELECT t.id, t.title, t.status, t.verification_status
FROM tasks t WHERE t.milestone_id='M001' AND t.slice_id='S01';
-- Open requirements
SELECT id, class, description FROM requirements WHERE status IN ('active','pending');
-- Decisions by scope
SELECT id, scope, decision FROM decisions WHERE scope='architecture' ORDER BY seq DESC;
-- Memory entries
SELECT id, category, content FROM memories ORDER BY seq DESC LIMIT 20;
```
### sift strategies
| Strategy | When to use |
|---|---|
| `path-hybrid` | Default. File path + content matching — best for most queries |
| `hybrid` | Pure content matching — when you don't care about file names |
| `page-index-hybrid` | Web-page-like content (documentation) |
| `bm25` | Exact keyword matching — fast fallback |
### DB schema reference
- `milestones` — id, title, status, vision, success_criteria (JSON), completed_at
- `slices` — milestone_id, id, title, status, risk, goal, success_criteria
- `tasks` — milestone_id, slice_id, id, title, status, one_liner, narrative, verification_result
- `requirements` — id, class, status, description, why, source, primary_owner, validation
- `decisions` — seq, id, scope, decision, choice, rationale, revisable, made_by
- `artifacts` — path, artifact_type, milestone_id, slice_id, task_id
- `memories` — id, category, content, confidence
</reference_guides>

View file

@ -48,6 +48,8 @@ const COLLAPSED_ITEM_COUNT = 10;
const liveSubagentProcesses = new Set<ChildProcess>();
const AGENT_ALIASES: Record<string, string> = {
default: "worker",
code: "reviewer",
coder: "typescript-pro",
["g" + "sd-executor"]: "worker",
"sf-worker": "worker",
"sf-scout": "scout",

View file

@ -0,0 +1,34 @@
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import test from "node:test";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const subagentSrc = readFileSync(join(__dirname, "..", "index.ts"), "utf-8");
test("subagent launcher resolves Node command specs instead of shelling through bash", () => {
assert.match(subagentSrc, /function resolveSubagentLaunchSpec\(/);
assert.match(
subagentSrc,
/command = process\.env\.SF_NODE_BIN \|\| process\.execPath/,
);
assert.doesNotMatch(subagentSrc, /bash -lc/);
});
test("normal subagent execution spawns the resolved Node command with argv array", () => {
assert.match(
subagentSrc,
/spawn\(\s*launchSpec\.command,\s*\[\.\.\.extensionArgs,\s*\.\.\.launchSpec\.args\]/,
);
assert.match(subagentSrc, /shell:\s*false/);
});
test("cmux launcher writes only explicit environment patch, not the full process env", () => {
assert.match(subagentSrc, /function writeNodeSubagentLauncher\(/);
assert.match(
subagentSrc,
/const env = \{ \.\.\.process\.env, \.\.\.\$\{JSON\.stringify\(launchSpec\.envPatch\)\} \}/,
);
assert.doesNotMatch(subagentSrc, /JSON\.stringify\(launchSpec\.env\)/);
});

View file

@ -0,0 +1,12 @@
{
"id": "vectordrive",
"name": "VectorDrive",
"version": "1.0.0",
"description": "Native vector database integration via vectordrive (Rust-based, in-process)",
"tier": "bundled",
"requires": { "platform": ">=2.71.0" },
"provides": {
"tools": ["vectordrive_info", "vectordrive_store", "vectordrive_search"],
"hooks": ["session_start", "session_shutdown"]
}
}

View file

@ -0,0 +1,36 @@
/**
* VectorDrive Extension for Singularity Forge
*
* Integrates the native Rust vectordrive vector database for semantic
* memory and code search. Works offline with no external services.
*/
import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
import { VectordriveManager } from "./manager.js";
import { registerVectordriveInfoTool } from "./tool-info.js";
import { registerVectordriveStoreTool } from "./tool-store.js";
import { registerVectordriveSearchTool } from "./tool-search.js";
export default function (pi: ExtensionAPI) {
registerVectordriveInfoTool(pi);
registerVectordriveStoreTool(pi);
registerVectordriveSearchTool(pi);
// Pre-warm the connection on session start
pi.on("session_start", async (_event, ctx) => {
const manager = VectordriveManager.getInstance();
const status = await manager.getStatus();
if (ctx.hasUI && status.backend === "none" && status.error) {
ctx.ui.notify(
`VectorDrive unavailable: ${status.error}`,
"warning",
);
}
});
pi.on("session_shutdown", async () => {
const manager = VectordriveManager.getInstance();
await manager.close();
});
}

View file

@ -0,0 +1,205 @@
/**
* VectorDrive Manager Singleton wrapping the native vectordrive VectorDb.
*
* Loads the `vectordrive` npm package dynamically (optional dependency),
* creates a persisted VectorDb in `.sf/vectordrive/`, and exposes status
* and search/store operations with graceful degradation.
*/
import { mkdirSync } from "node:fs";
import { dirname } from "node:path";
export type VectorBackend = "vectordrive" | "none";
export interface VectordriveStatus {
backend: VectorBackend;
version: string | null;
implementation: string | null;
initialized: boolean;
vectorCount: number;
error: string | null;
dbPath: string | null;
}
export interface VectorEntry {
id: string;
vector: number[];
metadata?: Record<string, unknown>;
}
export interface SearchResult {
id: string;
score: number;
metadata?: Record<string, unknown>;
}
const DB_DIR = ".sf/vectordrive";
const DB_PATH = `${DB_DIR}/forge.vectors`;
const DIMENSIONS = 384;
function getDbPath(): string {
const home = process.env.HOME || process.env.USERPROFILE || ".";
return `${home}/${DB_PATH}`;
}
function ensureDir(path: string): void {
try {
mkdirSync(dirname(path), { recursive: true });
} catch {
// ignore
}
}
/** Simple text→vector fallback when no embedding model is available. */
export function textToVector(text: string, dimensions: number = DIMENSIONS): number[] {
const vec = new Array(dimensions).fill(0);
const normalized = text.toLowerCase().trim();
for (let i = 0; i < normalized.length; i++) {
vec[i % dimensions] += normalized.charCodeAt(i) / 65535;
}
const mag = Math.sqrt(vec.reduce((s, v) => s + v * v, 0));
return mag > 0 ? vec.map((v) => v / mag) : vec;
}
export class VectordriveManager {
private static instance: VectordriveManager;
private status: VectordriveStatus | null = null;
private initPromise: Promise<VectordriveStatus> | null = null;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
private db: any | null = null;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
private vd: any | null = null;
private constructor() {}
static getInstance(): VectordriveManager {
if (!VectordriveManager.instance) {
VectordriveManager.instance = new VectordriveManager();
}
return VectordriveManager.instance;
}
async getStatus(): Promise<VectordriveStatus> {
if (this.status?.initialized) return this.status;
if (this.initPromise) return this.initPromise;
this.initPromise = this.probe();
return this.initPromise;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
async getDb(): Promise<any | null> {
const status = await this.getStatus();
if (status.backend !== "vectordrive") return null;
return this.db;
}
private async probe(): Promise<VectordriveStatus> {
const dbPath = getDbPath();
let vectordrive: any | null = null;
try {
const modName = "vectordrive";
vectordrive = await import(modName);
} catch (err) {
this.status = {
backend: "none",
version: null,
implementation: null,
initialized: true,
vectorCount: 0,
error: `vectordrive package not installed: ${err instanceof Error ? err.message : String(err)}`,
dbPath: null,
};
return this.status;
}
try {
this.vd = vectordrive;
ensureDir(dbPath);
const VectorDb = vectordrive.VectorDb || vectordrive.VectorDB;
if (typeof VectorDb !== "function") {
throw new Error("vectordrive package does not export VectorDb");
}
this.db = new VectorDb({
dimensions: DIMENSIONS,
storagePath: dbPath,
distanceMetric: "cosine",
});
const count = (await this.db.len()) as number;
const version = vectordrive.getVersion?.() ?? null;
const impl = vectordrive.getImplementationType?.() ?? "unknown";
this.status = {
backend: "vectordrive",
version: version?.version ?? null,
implementation: impl,
initialized: true,
vectorCount: count,
error: null,
dbPath,
};
return this.status;
} catch (err) {
this.status = {
backend: "none",
version: null,
implementation: null,
initialized: true,
vectorCount: 0,
error: err instanceof Error ? err.message : String(err),
dbPath: null,
};
return this.status;
}
}
async store(entry: VectorEntry): Promise<boolean> {
const db = await this.getDb();
if (!db) return false;
try {
await db.insert({
id: entry.id,
vector: entry.vector,
metadata: entry.metadata,
});
return true;
} catch {
return false;
}
}
async search(vector: number[], k: number): Promise<SearchResult[]> {
const db = await this.getDb();
if (!db) return [];
try {
const results = await db.search({ vector, k });
return results.map((r: any) => ({
id: String(r.id),
score: Number(r.score),
metadata: r.metadata,
}));
} catch {
return [];
}
}
async delete(id: string): Promise<boolean> {
const db = await this.getDb();
if (!db) return false;
try {
return await db.delete(id);
} catch {
return false;
}
}
async close(): Promise<void> {
this.db = null;
this.vd = null;
this.status = null;
this.initPromise = null;
}
}

View file

@ -0,0 +1,36 @@
import { describe, expect, it } from "vitest";
import { VectordriveManager, textToVector } from "../manager.js";
describe("VectordriveManager", () => {
it("should return singleton instance", () => {
const a = VectordriveManager.getInstance();
const b = VectordriveManager.getInstance();
expect(a).toBe(b);
});
it("should degrade gracefully when vectordrive is not installed", async () => {
const manager = VectordriveManager.getInstance();
await manager.close();
const status = await manager.getStatus();
expect(status.initialized).toBe(true);
expect(status.backend).toBe("none");
expect(status.error).toBeTruthy();
});
});
describe("textToVector", () => {
it("should produce normalized vectors", () => {
const v = textToVector("hello world", 384);
expect(v).toHaveLength(384);
const mag = Math.sqrt(v.reduce((s, x) => s + x * x, 0));
expect(mag).toBeCloseTo(1, 5);
});
it("should produce different vectors for different texts", () => {
const a = textToVector("authentication middleware for express", 384);
const b = textToVector("database migration helper in python", 384);
const similarity = a.reduce((s, x, i) => s + x * b[i], 0);
expect(similarity).toBeLessThan(0.95);
});
});

View file

@ -0,0 +1,68 @@
/**
* VectorDrive Info Tool
*
* Introspects the vectordrive native package status, version, implementation
* type (native vs wasm), and vector count.
*/
import { Type } from "@sinclair/typebox";
import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
import { VectordriveManager } from "./manager.js";
export interface ToolExecutionResult {
content: Array<{ type: "text"; text: string }>;
details: Record<string, unknown>;
isError?: boolean;
}
export function registerVectordriveInfoTool(pi: ExtensionAPI): void {
pi.registerTool({
name: "vectordrive_info",
label: "VectorDrive Info",
description:
"Check VectorDrive native vector database status. " +
"Returns implementation type (native Rust or WASM), version, " +
"vector count, and database path.",
promptSnippet: "Check VectorDrive database status and capabilities",
parameters: Type.Object({
refresh: Type.Optional(
Type.Boolean({
default: false,
description: "Force re-probe instead of using cached status",
}),
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const manager = VectordriveManager.getInstance();
if (params.refresh) {
await manager.close();
}
const status = await manager.getStatus();
const lines: string[] = [];
lines.push(`# VectorDrive Status`);
lines.push("");
lines.push(`- **Backend**: ${status.backend}`);
lines.push(`- **Implementation**: ${status.implementation ?? "n/a"}`);
lines.push(`- **Version**: ${status.version ?? "n/a"}`);
lines.push(`- **Vectors**: ${status.vectorCount}`);
lines.push(`- **Initialized**: ${status.initialized}`);
if (status.dbPath) {
lines.push(`- **DB Path**: ${status.dbPath}`);
}
if (status.error) {
lines.push(`- **Error**: ${status.error}`);
}
const text = lines.join("\n");
return {
content: [{ type: "text", text }],
details: { status },
isError: status.backend === "none",
};
},
});
}

View file

@ -0,0 +1,120 @@
/**
* VectorDrive Search Tool
*
* Semantic search over stored vectors. Accepts a pre-computed query vector
* or raw text (auto-embedded). Falls back to metadata keyword matching
* when vectordrive is offline.
*/
import { Type } from "@sinclair/typebox";
import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
import { VectordriveManager, textToVector } from "./manager.js";
export interface ToolExecutionResult {
content: Array<{ type: "text"; text: string }>;
details: Record<string, unknown>;
isError?: boolean;
}
export function registerVectordriveSearchTool(pi: ExtensionAPI): void {
pi.registerTool({
name: "vectordrive_search",
label: "VectorDrive Search",
description:
"Search VectorDrive by vector similarity or text query. " +
"Returns the most relevant stored entries with similarity scores. " +
"When no embedding model is available, a simple hash embedding is used — " +
"for best results provide pre-computed vectors via vectordrive_store.",
promptSnippet: "Search VectorDrive memories or code chunks",
promptGuidelines: [
"Use vectordrive_search to find previously stored memories, code chunks, or documents.",
"Be specific with queries for better results.",
"If you stored code with metadata.file_path, results will include the source location.",
],
parameters: Type.Object({
query: Type.String({
description: "Text query to search for (auto-converted to embedding)",
}),
vector: Type.Optional(
Type.Array(Type.Number(), {
description: "Optional pre-computed query vector. If provided, overrides 'query' text.",
}),
),
limit: Type.Optional(
Type.Number({
default: 10,
description: "Maximum results (1-50)",
minimum: 1,
maximum: 50,
}),
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const manager = VectordriveManager.getInstance();
const status = await manager.getStatus();
const limit = Math.min(Math.max(params.limit ?? 10, 1), 50);
if (status.backend === "none") {
return {
content: [
{
type: "text",
text: `VectorDrive is unavailable: ${status.error ?? "unknown error"}\n\nInstall with: npm install vectordrive`,
},
],
details: { status },
isError: true,
};
}
const queryVector =
params.vector && params.vector.length > 0
? params.vector
: textToVector(params.query);
const results = await manager.search(queryVector, limit);
if (results.length === 0) {
return {
content: [
{
type: "text",
text: `No results found in VectorDrive for query: "${params.query}"`,
},
],
details: { query: params.query, count: 0 },
isError: false,
};
}
const lines: string[] = [];
lines.push(`# VectorDrive Search Results`);
lines.push(`Query: "${params.query}"`);
lines.push("");
for (const r of results) {
const meta = r.metadata ?? {};
const preview = meta.text_preview ?? "";
lines.push(`## ${r.id} (score: ${r.score.toFixed(4)})`);
if (preview) {
lines.push("```");
lines.push(String(preview).slice(0, 400));
lines.push("```");
}
const metaLines = Object.entries(meta)
.filter(([k]) => k !== "text_preview" && k !== "stored_at")
.map(([k, v]) => `- ${k}: ${v}`);
if (metaLines.length > 0) {
lines.push(...metaLines);
}
lines.push("");
}
return {
content: [{ type: "text", text: lines.join("\n") }],
details: { results, count: results.length },
isError: false,
};
},
});
}

View file

@ -0,0 +1,116 @@
/**
* VectorDrive Store Tool
*
* Store a vector with metadata in the native VectorDb.
*/
import { Type } from "@sinclair/typebox";
import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
import { VectordriveManager, textToVector } from "./manager.js";
export interface ToolExecutionResult {
content: Array<{ type: "text"; text: string }>;
details: Record<string, unknown>;
isError?: boolean;
}
export function registerVectordriveStoreTool(pi: ExtensionAPI): void {
pi.registerTool({
name: "vectordrive_store",
label: "VectorDrive Store",
description:
"Store a vector entry in VectorDrive. Accepts either a pre-computed " +
"vector array or raw text (a simple hash embedding is generated automatically). " +
"Metadata is stored as JSON and returned in search results.",
promptSnippet: "Store a memory or code chunk in VectorDrive",
parameters: Type.Object({
id: Type.String({
description: "Unique identifier for this entry (e.g. file-path:line-range)",
}),
text: Type.Optional(
Type.String({
description: "Raw text content to store. A simple embedding is auto-generated if 'vector' is not provided.",
}),
),
vector: Type.Optional(
Type.Array(Type.Number(), {
description: "Pre-computed embedding vector (384 dimensions). Overrides 'text' if provided.",
}),
),
metadata: Type.Optional(
Type.Record(Type.String(), Type.Unknown(), {
description: "Optional metadata object (e.g. { file_path, line_start, language })",
}),
),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const manager = VectordriveManager.getInstance();
const status = await manager.getStatus();
if (status.backend === "none") {
return {
content: [
{
type: "text",
text: `VectorDrive is unavailable: ${status.error ?? "unknown error"}\n\nInstall with: npm install vectordrive`,
},
],
details: { status },
isError: true,
};
}
const id = params.id.trim();
if (!id) {
return {
content: [{ type: "text", text: "Error: id is required." }],
details: { error: "missing_id" },
isError: true,
};
}
let vector: number[];
if (params.vector && params.vector.length > 0) {
vector = params.vector;
} else if (params.text) {
vector = textToVector(params.text);
} else {
return {
content: [
{ type: "text", text: "Error: either 'text' or 'vector' must be provided." },
],
details: { error: "missing_content" },
isError: true,
};
}
const metadata: Record<string, unknown> = {
...(params.metadata ?? {}),
stored_at: new Date().toISOString(),
};
if (params.text) {
metadata.text_preview = params.text.slice(0, 200);
}
const ok = await manager.store({ id, vector, metadata });
if (!ok) {
return {
content: [{ type: "text", text: "Error: failed to store entry." }],
details: { error: "store_failed" },
isError: true,
};
}
return {
content: [
{
type: "text",
text: `Stored ${id} (${vector.length} dims).`,
},
],
details: { id, dimensions: vector.length, metadata },
isError: false,
};
},
});
}

View file

@ -0,0 +1,93 @@
---
name: create-sf-extension
description: Create, debug, and iterate on SF extensions (TypeScript modules that add tools, commands, event hooks, custom UI, and providers to SF). Use when asked to build an extension, add a tool the LLM can call, register a slash command, hook into SF events, create custom TUI components, or modify SF behavior. Triggers on "create extension", "build extension", "add a tool", "register command", "hook into sf", "custom tool", "sf plugin", "sf extension".
---
<essential_principles>
**Extensions are TypeScript modules** that hook into SF's runtime (built on pi). They export a default function receiving `ExtensionAPI` and use it to subscribe to events, register tools/commands/shortcuts, and interact with the session.
**SF extension paths (community/user-installed extensions):**
- Global: `~/.sf/agent/extensions/*.ts` or `~/.sf/agent/extensions/*/index.ts`
- Project-local: `.sf/extensions/*.ts` or `.sf/extensions/*/index.ts`
Note: `~/.sf/agent/extensions/` is reserved for bundled extensions synced from the sf package. Community extensions placed there are silently ignored by the loader.
**The three primitives:**
1. **Events** — Listen and react (`pi.on("event", handler)`). Can block tool calls, modify messages, inject context.
2. **Tools** — Give the LLM new abilities (`pi.registerTool()`). LLM calls them autonomously.
3. **Commands** — Give users slash commands (`pi.registerCommand()`). Users type `/mycommand`.
**Non-negotiable rules:**
- Use `StringEnum` from `@singularity-forge/pi-ai` for string enum params (NOT `Type.Union`/`Type.Literal` — breaks Google's API)
- Truncate tool output to 50KB / 2000 lines max (use `truncateHead`/`truncateTail` from `@singularity-forge/pi-coding-agent`)
- Store stateful tool state in `details` for branching support
- Check `signal?.aborted` in long-running tool executions
- Use `pi.exec()` not `child_process` for shell commands
- Check `ctx.hasUI` before dialog methods (non-interactive modes exist)
- Session control methods (`waitForIdle`, `newSession`, `fork`, `navigateTree`, `reload`) are ONLY available in command handlers — they deadlock in event handlers
- Lines from `render()` must not exceed `width` — use `truncateToWidth()`
- Use theme from callback params, never import directly
- Strip leading `@` from path params in custom tools (some models add it)
**Available imports:**
| Package | Purpose |
|---------|---------|
| `@singularity-forge/pi-coding-agent` | `ExtensionAPI`, `ExtensionContext`, `Theme`, event types, tool utilities, `DynamicBorder`, `BorderedLoader`, `CustomEditor`, `highlightCode` |
| `@sinclair/typebox` | `Type.Object`, `Type.String`, `Type.Number`, `Type.Optional`, `Type.Boolean`, `Type.Array` |
| `@singularity-forge/pi-ai` | `StringEnum` (required for string enums), `Type` re-export |
| `@singularity-forge/pi-tui` | `Text`, `Box`, `Container`, `Spacer`, `Markdown`, `SelectList`, `Input`, `matchesKey`, `Key`, `truncateToWidth`, `visibleWidth` |
| Node.js built-ins | `node:fs`, `node:path`, `node:child_process`, etc. |
</essential_principles>
<routing>
Based on user intent, route to the appropriate workflow:
**Building a new extension:**
- "Create an extension", "build a tool", "I want to add a command" → `workflows/create-extension.md`
**Adding capabilities to an existing extension:**
- "Add a tool to my extension", "add event hook", "add custom rendering" → `workflows/add-capability.md`
**Debugging an extension:**
- "My extension doesn't work", "tool not showing up", "event not firing" → `workflows/debug-extension.md`
**If user intent is clear from context, skip the question and go directly to the workflow.**
</routing>
<reference_index>
All domain knowledge in `references/`:
**Core architecture:** extension-lifecycle.md, events-reference.md
**API surface:** extensionapi-reference.md, extensioncontext-reference.md
**Capabilities:** custom-tools.md, custom-commands.md, custom-ui.md, custom-rendering.md
**Patterns:** state-management.md, system-prompt-modification.md, compaction-session-control.md
**Infrastructure:** model-provider-management.md, remote-execution-overrides.md, packaging-distribution.md, mode-behavior.md
**Spec:** `docs/extension-sdk/manifest-spec.md` — manifest format, tiers, validation
**Testing:** `docs/extension-sdk/testing.md` — mock patterns, test conventions
**SDK:** `docs/extension-sdk/` — the authoritative SF extension guide
**Gotchas:** key-rules-gotchas.md
</reference_index>
<workflows_index>
| Workflow | Purpose |
|----------|---------|
| create-extension.md | Build a new extension from scratch |
| add-capability.md | Add tools, commands, hooks, UI to an existing extension |
| debug-extension.md | Diagnose and fix extension issues |
</workflows_index>
<success_criteria>
Extension is complete when:
- `extension-manifest.json` exists with accurate `provides` listing all registered tools/commands/hooks/shortcuts
- TypeScript compiles without errors (jiti handles this at runtime)
- Extension loads on SF startup or `/reload` without errors
- Tools appear in the LLM's system prompt and are callable
- Commands respond to `/command` input
- Event hooks fire at the expected lifecycle points
- Custom UI renders correctly within terminal width
- State persists correctly across session restarts (if stateful)
- Output is truncated to safe limits (if tools produce variable output)
</success_criteria>

View file

@ -0,0 +1,77 @@
<overview>
Custom compaction hooks, triggering compaction, and session control methods available only in command handlers.
</overview>
<custom_compaction>
Override default compaction behavior:
```typescript
pi.on("session_before_compact", async (event, ctx) => {
const { preparation, branchEntries, customInstructions, signal } = event;
// Option 1: Cancel
return { cancel: true };
// Option 2: Custom summary
return {
compaction: {
summary: "Custom summary of conversation so far...",
firstKeptEntryId: preparation.firstKeptEntryId,
tokensBefore: preparation.tokensBefore,
}
};
});
```
</custom_compaction>
<trigger_compaction>
Trigger compaction programmatically from any handler:
```typescript
ctx.compact({
customInstructions: "Focus on the authentication changes",
onComplete: (result) => ctx.ui.notify("Compacted!", "info"),
onError: (error) => ctx.ui.notify(`Failed: ${error.message}`, "error"),
});
```
</trigger_compaction>
<session_control>
**Only available in command handlers** (deadlocks in event handlers):
```typescript
pi.registerCommand("handoff", {
handler: async (args, ctx) => {
await ctx.waitForIdle();
// Create new session with initial context
const result = await ctx.newSession({
parentSession: ctx.sessionManager.getSessionFile(),
setup: async (sm) => {
sm.appendMessage({
role: "user",
content: [{ type: "text", text: `Context: ${args}` }],
timestamp: Date.now(),
});
},
});
if (result.cancelled) { /* extension cancelled via session_before_switch */ }
},
});
```
| Method | Purpose |
|--------|---------|
| `ctx.waitForIdle()` | Wait for agent to finish streaming |
| `ctx.newSession(options?)` | Create a new session |
| `ctx.fork(entryId)` | Fork from a specific entry |
| `ctx.navigateTree(targetId, options?)` | Navigate session tree (with optional summary) |
| `ctx.reload()` | Hot-reload everything (treat as terminal — code after runs pre-reload version) |
`navigateTree` options:
- `summarize: boolean` — generate summary of abandoned branch
- `customInstructions: string` — instructions for summarizer
- `replaceInstructions: boolean` — replace default prompt entirely
- `label: string` — label to attach to branch summary
</session_control>

View file

@ -0,0 +1,139 @@
<overview>
Custom slash commands — registration, argument completions, subcommand patterns, and the extended command context.
</overview>
<basic_registration>
```typescript
pi.registerCommand("deploy", {
description: "Deploy to an environment",
handler: async (args, ctx) => {
// args = everything after "/deploy "
// ctx = ExtensionCommandContext (has session control methods)
ctx.ui.notify(`Deploying to ${args || "production"}`, "info");
},
});
```
</basic_registration>
<argument_completions>
Add tab-completion for command arguments:
```typescript
import type { AutocompleteItem } from "@singularity-forge/pi-tui";
pi.registerCommand("deploy", {
description: "Deploy to an environment",
getArgumentCompletions: (prefix: string): AutocompleteItem[] | null => {
const envs = ["dev", "staging", "prod"];
const items = envs.map(e => ({ value: e, label: e }));
const filtered = items.filter(i => i.value.startsWith(prefix));
return filtered.length > 0 ? filtered : null;
},
handler: async (args, ctx) => {
ctx.ui.notify(`Deploying to ${args}`, "info");
},
});
```
</argument_completions>
<subcommand_pattern>
Fake nested commands via first-argument parsing. Used by `/wt new|ls|switch|merge|rm`.
```typescript
pi.registerCommand("foo", {
description: "Manage foo items: /foo new|list|delete [name]",
getArgumentCompletions: (prefix: string) => {
const parts = prefix.trim().split(/\s+/);
// First arg: subcommand
if (parts.length <= 1) {
return ["new", "list", "delete"]
.filter(cmd => cmd.startsWith(parts[0] ?? ""))
.map(cmd => ({ value: cmd, label: cmd }));
}
// Second arg: depends on subcommand
if (parts[0] === "delete") {
const items = getItemsSomehow();
return items
.filter(name => name.startsWith(parts[1] ?? ""))
.map(name => ({ value: `delete ${name}`, label: name }));
}
return [];
},
handler: async (args, ctx) => {
const parts = args.trim().split(/\s+/);
const sub = parts[0];
switch (sub) {
case "new": /* ... */ return;
case "list": /* ... */ return;
case "delete": /* handle parts[1] */ return;
default:
ctx.ui.notify("Usage: /foo <new|list|delete> [name]", "info");
}
},
});
```
**Gotcha:** `"".trim().split(/\s+/)` produces `['']`, not `[]`. That's why `parts.length <= 1` handles both empty and partial first arg.
</subcommand_pattern>
<command_context>
Command handlers get `ExtensionCommandContext` which extends `ExtensionContext` with session control methods:
| Method | Purpose |
|--------|---------|
| `ctx.waitForIdle()` | Wait for agent to finish streaming |
| `ctx.newSession(options?)` | Create a new session |
| `ctx.fork(entryId)` | Fork from an entry |
| `ctx.navigateTree(targetId, options?)` | Navigate session tree |
| `ctx.reload()` | Hot-reload everything |
**⚠️ These methods are ONLY available in command handlers.** Calling them from event handlers causes deadlocks.
```typescript
pi.registerCommand("handoff", {
handler: async (args, ctx) => {
await ctx.waitForIdle();
await ctx.newSession({
setup: async (sm) => {
sm.appendMessage({
role: "user",
content: [{ type: "text", text: `Context: ${args}` }],
timestamp: Date.now(),
});
},
});
},
});
```
</command_context>
<reload_pattern>
Expose reload as both a command and a tool the LLM can call:
```typescript
pi.registerCommand("reload-runtime", {
description: "Reload extensions, skills, prompts, and themes",
handler: async (_args, ctx) => {
await ctx.reload();
return; // Treat reload as terminal
},
});
pi.registerTool({
name: "reload_runtime",
label: "Reload Runtime",
description: "Reload extensions, skills, prompts, and themes",
parameters: Type.Object({}),
async execute() {
pi.sendUserMessage("/reload-runtime", { deliverAs: "followUp" });
return { content: [{ type: "text", text: "Queued /reload-runtime as follow-up." }] };
},
});
```
</reload_pattern>

View file

@ -0,0 +1,108 @@
<overview>
Custom rendering for tools and messages — control how they appear in the TUI.
</overview>
<tool_rendering>
Tools can provide `renderCall` (how the call looks) and `renderResult` (how the result looks):
```typescript
import { Text } from "@singularity-forge/pi-tui";
import { keyHint } from "@singularity-forge/pi-coding-agent";
pi.registerTool({
name: "my_tool",
// ...
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("my_tool "));
text += theme.fg("muted", args.action);
if (args.text) text += " " + theme.fg("dim", `"${args.text}"`);
return new Text(text, 0, 0); // 0,0 padding — Box handles it
},
renderResult(result, { expanded, isPartial }, theme) {
// isPartial = true during streaming (onUpdate was called)
if (isPartial) {
return new Text(theme.fg("warning", "Processing..."), 0, 0);
}
// expanded = user toggled expand (Ctrl+O)
if (result.details?.error) {
return new Text(theme.fg("error", `Error: ${result.details.error}`), 0, 0);
}
let text = theme.fg("success", "✓ Done");
if (!expanded) {
text += ` (${keyHint("expandTools", "to expand")})`;
}
if (expanded && result.details?.items) {
for (const item of result.details.items) {
text += "\n " + theme.fg("dim", item);
}
}
return new Text(text, 0, 0);
},
});
```
If you omit `renderCall`/`renderResult`, the built-in renderer is used. Useful for tool overrides where you just wrap logic without reimplementing UI.
**Fallback:** If render methods throw, `renderCall` shows tool name, `renderResult` shows raw `content` text.
</tool_rendering>
<key_hints>
Key hint helpers for showing keybinding info in render output:
```typescript
import { keyHint, appKeyHint, editorKey, rawKeyHint } from "@singularity-forge/pi-coding-agent";
// Editor action hint (respects user keybinding config)
keyHint("expandTools", "to expand") // e.g., "Ctrl+O to expand"
keyHint("selectConfirm", "to select")
// Raw key hint (always shows literal key)
rawKeyHint("Ctrl+O", "to expand")
```
</key_hints>
<message_rendering>
Register a renderer for custom message types:
```typescript
import { Text } from "@singularity-forge/pi-tui";
pi.registerMessageRenderer("my-extension", (message, options, theme) => {
const { expanded } = options;
let text = theme.fg("accent", `[${message.customType}] `) + message.content;
if (expanded && message.details) {
text += "\n" + theme.fg("dim", JSON.stringify(message.details, null, 2));
}
return new Text(text, 0, 0);
});
// Send messages that use this renderer:
pi.sendMessage({
customType: "my-extension", // Matches renderer name
content: "Status update",
display: true,
details: { foo: "bar" },
});
```
</message_rendering>
<syntax_highlighting>
```typescript
import { highlightCode, getLanguageFromPath } from "@singularity-forge/pi-coding-agent";
const lang = getLanguageFromPath("/path/to/file.rs"); // "rust"
const highlighted = highlightCode(code, lang, theme);
```
</syntax_highlighting>
<best_practices>
- Return `Text` with padding `(0, 0)` — the wrapping `Box` handles padding
- Support `expanded` for detail on demand
- Handle `isPartial` for streaming progress
- Keep collapsed view compact
- Use `\n` for multi-line content within a single `Text`
</best_practices>

View file

@ -0,0 +1,183 @@
<overview>
Complete custom tools reference — registration, parameters, execution, output truncation, overrides, rendering, and dynamic registration.
</overview>
<registration>
```typescript
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/pi-ai";
pi.registerTool({
name: "my_tool", // Unique identifier (snake_case)
label: "My Tool", // Display name in TUI
description: "What this does", // Full description shown to LLM
// Optional: one-liner for system prompt "Available tools" section
promptSnippet: "Manage project todo items",
// Optional: bullets added to system prompt "Guidelines" when tool is active
promptGuidelines: [
"Use my_tool for task management instead of file edits."
],
// Parameter schema (MUST use TypeBox)
parameters: Type.Object({
action: StringEnum(["list", "add", "remove"] as const),
text: Type.Optional(Type.String({ description: "Item text" })),
id: Type.Optional(Type.Number({ description: "Item ID" })),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
// 1. Check cancellation
if (signal?.aborted) {
return { content: [{ type: "text", text: "Cancelled" }] };
}
// 2. Stream progress (optional)
onUpdate?.({
content: [{ type: "text", text: "Working..." }],
details: { progress: 50 },
});
// 3. Do the work
const result = await doWork(params);
// 4. Return result
return {
content: [{ type: "text", text: "Result text for LLM" }], // Sent to LLM context
details: { data: result }, // For rendering & state
};
},
// Optional: custom TUI rendering
renderCall(args, theme) { ... },
renderResult(result, { expanded, isPartial }, theme) { ... },
});
```
</registration>
<critical_stringenum>
**⚠️ MUST use `StringEnum` for string enum parameters:**
```typescript
import { StringEnum } from "@singularity-forge/pi-ai";
// ✅ Correct — works with all providers including Google
action: StringEnum(["list", "add", "remove"] as const)
// ❌ BROKEN with Google's API
action: Type.Union([Type.Literal("list"), Type.Literal("add")])
```
</critical_stringenum>
<output_truncation>
Tools MUST truncate output to avoid context overflow. Built-in limit: 50KB / 2000 lines.
```typescript
import {
truncateHead, truncateTail, formatSize,
DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES,
} from "@singularity-forge/pi-coding-agent";
async execute(toolCallId, params, signal, onUpdate, ctx) {
const output = await runCommand();
const truncation = truncateHead(output, {
maxLines: DEFAULT_MAX_LINES,
maxBytes: DEFAULT_MAX_BYTES,
});
let result = truncation.content;
if (truncation.truncated) {
const tempFile = writeTempFile(output);
result += `\n\n[Output truncated: ${truncation.outputLines}/${truncation.totalLines} lines`;
result += ` (${formatSize(truncation.outputBytes)}/${formatSize(truncation.totalBytes)}).`;
result += ` Full output: ${tempFile}]`;
}
return { content: [{ type: "text", text: result }] };
}
```
Use `truncateHead` when beginning matters (search results, file reads). Use `truncateTail` when end matters (logs, command output).
</output_truncation>
<signaling_errors>
Throw to signal an error (sets `isError: true`). Returning a value never sets error flag.
```typescript
async execute(toolCallId, params) {
if (!isValid(params.input)) {
throw new Error(`Invalid input: ${params.input}`);
}
return { content: [{ type: "text", text: "OK" }], details: {} };
}
```
</signaling_errors>
<dynamic_registration>
Tools can be registered at any time — during load, in `session_start`, in command handlers. Available immediately without `/reload`.
```typescript
pi.on("session_start", async (_event, ctx) => {
pi.registerTool({ name: "dynamic_tool", ... });
});
```
Use `pi.setActiveTools(names)` to enable/disable tools at runtime.
</dynamic_registration>
<overriding_builtins>
Register a tool with the same name as a built-in (`read`, `bash`, `edit`, `write`, `grep`, `find`, `ls`) to override it. **Must match exact result shape including `details` type.**
```typescript
import { createReadTool } from "@singularity-forge/pi-coding-agent";
pi.registerTool({
name: "read",
label: "Read (Logged)",
description: "Read file contents with logging",
parameters: Type.Object({
path: Type.String(),
offset: Type.Optional(Type.Number()),
limit: Type.Optional(Type.Number()),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
console.log(`[AUDIT] Reading: ${params.path}`);
const builtIn = createReadTool(ctx.cwd);
return builtIn.execute(toolCallId, params, signal, onUpdate);
},
// Omit renderCall/renderResult to use built-in renderer
});
```
Start with no built-in tools: `sf --no-tools -e ./my-extension.ts`
</overriding_builtins>
<multiple_tools>
One extension can register multiple tools with shared state:
```typescript
export default function (pi: ExtensionAPI) {
let connection = null;
pi.registerTool({ name: "db_connect", ... });
pi.registerTool({ name: "db_query", ... });
pi.registerTool({ name: "db_close", ... });
pi.on("session_shutdown", async () => {
connection?.close();
});
}
```
</multiple_tools>
<path_normalization>
Some models add `@` prefix to path arguments. Strip it:
```typescript
async execute(toolCallId, params, signal, onUpdate, ctx) {
let path = params.path;
if (path.startsWith("@")) path = path.slice(1);
// ...
}
```
</path_normalization>

View file

@ -0,0 +1,490 @@
<overview>
Complete custom UI reference — dialogs, persistent elements, custom components, overlays, custom editors, built-in components, keyboard input, performance, theming, and common mistakes.
</overview>
<ui_architecture>
```
┌─────────────────────────────────────────────────┐
│ Custom Header (ctx.ui.setHeader) │
├─────────────────────────────────────────────────┤
│ Message Area │
│ - User/assistant messages │
│ - Tool calls ◄── renderCall/renderResult │
│ - Custom messages ◄── registerMessageRenderer │
├─────────────────────────────────────────────────┤
│ Widgets (above editor) ◄── ctx.ui.setWidget │
├─────────────────────────────────────────────────┤
│ Editor ◄── ctx.ui.custom() / setEditorComponent│
├─────────────────────────────────────────────────┤
│ Widgets (below editor) ◄── ctx.ui.setWidget │
├─────────────────────────────────────────────────┤
│ Footer ◄── ctx.ui.setFooter / setStatus │
└─────────────────────────────────────────────────┘
┌─────────────────────┐
│ Overlay (floating) │ ◄── ctx.ui.custom({ overlay })
└─────────────────────┘
```
**11 ways to get UI on screen:**
| Method | Blocks? | Replaces editor? |
|--------|---------|-------------------|
| `ctx.ui.select/confirm/input/editor` | Yes | Temporarily |
| `ctx.ui.notify` | No | No |
| `ctx.ui.setStatus` | No | No (footer) |
| `ctx.ui.setWidget` | No | No |
| `ctx.ui.setFooter` | No | No (replaces footer) |
| `ctx.ui.setHeader` | No | No (replaces header) |
| `ctx.ui.custom()` | Yes | Temporarily |
| `ctx.ui.custom({overlay})` | Yes | No (renders on top) |
| `ctx.ui.setEditorComponent` | No | Yes (permanently) |
| `renderCall/renderResult` | No | No (inline in messages) |
| `registerMessageRenderer` | No | No (inline in messages) |
</ui_architecture>
<component_interface>
Every visual element implements:
```typescript
interface Component {
render(width: number): string[]; // Required — each line ≤ width visible chars
handleInput?(data: string): void; // Optional — receive keyboard input
wantsKeyRelease?: boolean; // Optional — receive key release events (Kitty protocol)
invalidate(): void; // Required — clear cached render state
}
```
**Render contract:**
- Return array of strings, one per line
- Each string MUST NOT exceed `width` in visible characters
- ANSI escape codes don't count toward visible width
- **Styles are reset at end of each line** — reapply per line
- Return `[]` for zero-height component
**Invalidation contract:**
- Clear ALL cached render output
- Clear any pre-baked themed strings
- Call `super.invalidate()` if extending a built-in component
</component_interface>
<dialogs>
Blocking dialog methods on `ctx.ui`:
```typescript
const choice = await ctx.ui.select("Pick one:", ["A", "B", "C"]); // string | undefined
const ok = await ctx.ui.confirm("Delete?", "This cannot be undone"); // boolean
const name = await ctx.ui.input("Name:", "placeholder"); // string | undefined
const text = await ctx.ui.editor("Edit:", "prefilled text"); // string | undefined
// Timed auto-dismiss with countdown
const ok = await ctx.ui.confirm("Proceed?", "Auto-continues in 5s", { timeout: 5000 });
// Returns false on timeout, undefined for select/input
// Manual dismissal with AbortSignal (distinguish timeout from cancel)
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 5000);
const ok = await ctx.ui.confirm("Timed", "Auto-cancels in 5s", { signal: controller.signal });
clearTimeout(timeoutId);
if (controller.signal.aborted) { /* timed out */ }
```
</dialogs>
<persistent_ui>
```typescript
// Footer status (multiple extensions can set independent entries)
ctx.ui.setStatus("my-ext", "● Active");
ctx.ui.setStatus("my-ext", undefined); // Clear
// Widgets
ctx.ui.setWidget("my-id", ["Line 1", "Line 2"]); // Above editor
ctx.ui.setWidget("my-id", ["Below"], { placement: "belowEditor" }); // Below editor
ctx.ui.setWidget("my-id", (_tui, theme) => ({ // Themed
render: () => [theme.fg("accent", "Styled")],
invalidate: () => {},
}));
ctx.ui.setWidget("my-id", undefined); // Clear
// Working message during streaming
ctx.ui.setWorkingMessage("Analyzing code...");
ctx.ui.setWorkingMessage(); // Restore default
// Custom footer (full replacement)
ctx.ui.setFooter((tui, theme, footerData) => ({
render(width) {
const branch = footerData.getGitBranch(); // Only available here
const statuses = footerData.getExtensionStatuses(); // All setStatus values
return [truncateToWidth(`${branch} | model`, width)];
},
invalidate() {},
dispose: footerData.onBranchChange(() => tui.requestRender()), // Reactive
}));
ctx.ui.setFooter(undefined); // Restore default
// Custom header
ctx.ui.setHeader((tui, theme) => ({
render(width) { return [theme.fg("accent", theme.bold("My Header"))]; },
invalidate() {},
}));
// Editor control
ctx.ui.setEditorText("Prefill");
const current = ctx.ui.getEditorText();
ctx.ui.pasteToEditor("pasted content"); // Triggers paste handling
// Tool expansion
ctx.ui.setToolsExpanded(true);
const expanded = ctx.ui.getToolsExpanded();
// Theme management
const themes = ctx.ui.getAllThemes();
ctx.ui.setTheme("light");
ctx.ui.theme.fg("accent", "text"); // Access current theme
```
</persistent_ui>
<custom_components>
`ctx.ui.custom()` temporarily replaces the editor. Returns a value when `done()` is called.
**Factory callback args:**
| Argument | Type | Purpose |
|----------|------|---------|
| `tui` | `TUI` | `tui.requestRender()` triggers re-render after state changes |
| `theme` | `Theme` | Current theme for styling |
| `keybindings` | `KeybindingsManager` | App keybinding config |
| `done` | `(value: T) => void` | Close component and return value |
**Inline pattern:**
```typescript
const result = await ctx.ui.custom<string | null>((tui, theme, keybindings, done) => ({
render(width: number): string[] {
return [truncateToWidth("Press Enter to confirm, Escape to cancel", width)];
},
handleInput(data: string) {
if (matchesKey(data, Key.enter)) done("confirmed");
if (matchesKey(data, Key.escape)) done(null);
},
invalidate() {},
}));
```
**Class-based pattern (recommended for complex UI):**
```typescript
class MyComponent {
private selected = 0;
private cachedWidth?: number;
private cachedLines?: string[];
constructor(
private tui: { requestRender: () => void },
private theme: Theme,
private items: string[],
private done: (value: string | null) => void,
) {}
handleInput(data: string) {
if (matchesKey(data, Key.up) && this.selected > 0) this.selected--;
else if (matchesKey(data, Key.down) && this.selected < this.items.length - 1) this.selected++;
else if (matchesKey(data, Key.enter)) { this.done(this.items[this.selected]); return; }
else if (matchesKey(data, Key.escape)) { this.done(null); return; }
else return;
this.invalidate();
this.tui.requestRender();
}
render(width: number): string[] {
if (this.cachedLines && this.cachedWidth === width) return this.cachedLines;
this.cachedLines = this.items.map((item, i) =>
truncateToWidth((i === this.selected ? "> " : " ") + item, width)
);
this.cachedWidth = width;
return this.cachedLines;
}
invalidate() { this.cachedWidth = undefined; this.cachedLines = undefined; }
}
const result = await ctx.ui.custom<string | null>((tui, theme, _kb, done) =>
new MyComponent(tui, theme, ["A", "B", "C"], done)
);
```
**Composing with built-in components:**
```typescript
const result = await ctx.ui.custom<string | null>((tui, theme, _kb, done) => {
const container = new Container();
container.addChild(new DynamicBorder((s: string) => theme.fg("accent", s)));
container.addChild(new Text(theme.fg("accent", theme.bold("Title")), 1, 0));
const selectList = new SelectList(items, 10, {
selectedPrefix: (t) => theme.fg("accent", t),
selectedText: (t) => theme.fg("accent", t),
description: (t) => theme.fg("muted", t),
scrollInfo: (t) => theme.fg("dim", t),
noMatch: (t) => theme.fg("warning", t),
});
selectList.onSelect = (item) => done(item.value);
selectList.onCancel = () => done(null);
container.addChild(selectList);
return {
render: (w) => container.render(w),
invalidate: () => container.invalidate(),
handleInput: (data) => { selectList.handleInput(data); tui.requestRender(); },
};
});
```
</custom_components>
<overlays>
Floating modals rendered on top of everything:
```typescript
const result = await ctx.ui.custom<string | null>(
(tui, theme, _kb, done) => new MyDialog({ onClose: done }),
{
overlay: true,
overlayOptions: {
anchor: "center", // 9 positions (see below)
width: "50%", // number = columns, string = percentage
minWidth: 40,
maxHeight: "80%",
margin: 2, // All sides, or { top, right, bottom, left }
offsetX: 0, offsetY: 0, // Fine-tune position
visible: (w, h) => w >= 80, // Hide on narrow terminals
},
onHandle: (handle) => {
// handle.setHidden(true/false) — temporarily hide
// handle.hide() — permanently remove
},
}
);
```
**Anchor positions:**
```
top-left top-center top-right
left-center center right-center
bottom-left bottom-center bottom-right
```
**Stacked overlays:** Multiple overlays stack (newest on top). Closing one gives focus to the one below.
**⚠️ Overlay lifecycle:** Components are disposed when closed. Never reuse references — create fresh instances each time.
</overlays>
<custom_editor>
Replace the main input editor permanently:
```typescript
import { CustomEditor } from "@singularity-forge/pi-coding-agent";
class VimEditor extends CustomEditor {
private mode: "normal" | "insert" = "insert";
handleInput(data: string): void {
if (matchesKey(data, "escape") && this.mode === "insert") {
this.mode = "normal"; return;
}
if (this.mode === "insert") { super.handleInput(data); return; }
switch (data) {
case "i": this.mode = "insert"; return;
case "h": super.handleInput("\x1b[D"); return; // Left
case "j": super.handleInput("\x1b[B"); return; // Down
case "k": super.handleInput("\x1b[A"); return; // Up
case "l": super.handleInput("\x1b[C"); return; // Right
}
if (data.length === 1 && data.charCodeAt(0) >= 32) return; // Block printable in normal
super.handleInput(data);
}
}
ctx.ui.setEditorComponent((_tui, theme, keybindings) => new VimEditor(theme, keybindings));
ctx.ui.setEditorComponent(undefined); // Restore default
```
**Critical:** Extend `CustomEditor` (NOT `Editor`) to get app keybindings (escape to abort, ctrl+d, model switching).
</custom_editor>
<built_in_components>
**From `@singularity-forge/pi-tui`:**
| Component | Constructor | Purpose |
|-----------|-------------|---------|
| `Text` | `new Text(content, paddingX, paddingY, bgFn?)` | Multi-line text with word wrap |
| `Box` | `new Box(paddingX, paddingY, bgFn)` | Container with padding+background, `.addChild()` |
| `Container` | `new Container()` | Vertical stack, `.addChild()`, `.removeChild()`, `.clear()` |
| `Spacer` | `new Spacer(lines)` | Empty vertical space |
| `Markdown` | `new Markdown(content, padX, padY, getMarkdownTheme())` | Rendered markdown with syntax highlighting |
| `Image` | `new Image(base64, mimeType, theme, opts?)` | Image rendering (Kitty, iTerm2) |
| `SelectList` | `new SelectList(items, maxVisible, themeOpts)` | Interactive selection with search and scrolling |
| `SettingsList` | `new SettingsList(items, maxVisible, theme, onChange, onClose, opts?)` | Toggle settings with left/right arrows |
| `Input` | `new Input()` | Text input field |
| `Editor` | `new Editor(tui, editorTheme)` | Multi-line editor with undo |
**SelectList usage:**
```typescript
const items: SelectItem[] = [
{ value: "opt1", label: "Option 1", description: "First option" },
{ value: "opt2", label: "Option 2" },
];
const selectList = new SelectList(items, 10, {
selectedPrefix: (t) => theme.fg("accent", t),
selectedText: (t) => theme.fg("accent", t),
description: (t) => theme.fg("muted", t),
scrollInfo: (t) => theme.fg("dim", t),
noMatch: (t) => theme.fg("warning", t),
});
selectList.onSelect = (item) => { /* item.value */ };
selectList.onCancel = () => { /* escape pressed */ };
```
**SettingsList usage:**
```typescript
const items: SettingItem[] = [
{ id: "verbose", label: "Verbose mode", currentValue: "off", values: ["on", "off"] },
{ id: "theme", label: "Theme", currentValue: "dark", values: ["dark", "light", "auto"] },
];
const settings = new SettingsList(items, 15, getSettingsListTheme(),
(id, newValue) => { /* setting changed */ },
() => { /* close requested */ },
{ enableSearch: true },
);
```
**From `@singularity-forge/pi-coding-agent`:**
| Component | Constructor | Purpose |
|-----------|-------------|---------|
| `DynamicBorder` | `new DynamicBorder((s: string) => theme.fg("accent", s))` | Border line |
| `BorderedLoader` | — | Spinner with cancel support |
| `CustomEditor` | `new CustomEditor(theme, keybindings)` | Base class for custom editors |
</built_in_components>
<keyboard_input>
```typescript
import { matchesKey, Key } from "@singularity-forge/pi-tui";
handleInput(data: string) {
// Basic keys
if (matchesKey(data, Key.up)) {}
if (matchesKey(data, Key.down)) {}
if (matchesKey(data, Key.enter)) {}
if (matchesKey(data, Key.escape)) {}
if (matchesKey(data, Key.tab)) {}
if (matchesKey(data, Key.space)) {}
if (matchesKey(data, Key.backspace)) {}
if (matchesKey(data, Key.home)) {}
if (matchesKey(data, Key.end)) {}
// With modifiers
if (matchesKey(data, Key.ctrl("c"))) {}
if (matchesKey(data, Key.shift("tab"))) {}
if (matchesKey(data, Key.alt("left"))) {}
if (matchesKey(data, Key.ctrlShift("p"))) {}
// String format also works: "enter", "ctrl+c", "shift+tab"
// Printable character detection
if (data.length === 1 && data.charCodeAt(0) >= 32) {
// Letter, number, symbol
}
}
```
**handleInput contract:**
1. Check for your keys
2. Update state
3. Call `this.invalidate()` if render output changes
4. Call `tui.requestRender()` to trigger re-render
</keyboard_input>
<line_width_rule>
**Cardinal rule: each line from render() must not exceed `width` visible characters.**
```typescript
import { visibleWidth, truncateToWidth, wrapTextWithAnsi } from "@singularity-forge/pi-tui";
visibleWidth("\x1b[32mHello\x1b[0m"); // Returns 5 (ignores ANSI codes)
truncateToWidth("Very long text here", 10); // "Very lo..."
truncateToWidth("Very long text here", 10, ""); // "Very long " (no ellipsis)
wrapTextWithAnsi("\x1b[32mLong green text\x1b[0m", 10); // Word wrap preserving ANSI
```
If lines exceed `width`, terminal wraps cause visual corruption.
</line_width_rule>
<performance_caching>
Always cache render output:
```typescript
class CachedComponent {
private cachedWidth?: number;
private cachedLines?: string[];
render(width: number): string[] {
if (this.cachedLines && this.cachedWidth === width) return this.cachedLines;
const lines = this.computeLines(width);
this.cachedWidth = width;
this.cachedLines = lines;
return lines;
}
invalidate() { this.cachedWidth = undefined; this.cachedLines = undefined; }
}
```
**Update cycle:** State changes → `invalidate()``tui.requestRender()``render(width)` called
**Game loop pattern** (real-time updates):
```typescript
this.interval = setInterval(() => {
this.tick();
this.version++;
this.tui.requestRender();
}, 100); // 10 FPS
// Clean up in dispose()
clearInterval(this.interval);
```
</performance_caching>
<theme_colors>
Always use theme from callback params, never import directly.
**All foreground colors:**
| Category | Colors |
|----------|--------|
| General | `text`, `accent`, `muted`, `dim` |
| Status | `success`, `error`, `warning` |
| Borders | `border`, `borderAccent`, `borderMuted` |
| Messages | `userMessageText`, `customMessageText`, `customMessageLabel` |
| Tools | `toolTitle`, `toolOutput` |
| Diffs | `toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext` |
| Markdown | `mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet` |
| Syntax | `syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation` |
| Thinking | `thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh` |
**All background colors:** `selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`
**Syntax highlighting:**
```typescript
import { highlightCode, getLanguageFromPath } from "@singularity-forge/pi-coding-agent";
const lang = getLanguageFromPath("/file.rs"); // "rust"
const highlighted = highlightCode(code, lang, theme);
```
</theme_colors>
<common_mistakes>
1. **Lines exceed width** → Visual corruption. Use `truncateToWidth()` on every line.
2. **Forgetting `tui.requestRender()`** → UI doesn't update. Call after invalidate().
3. **Importing theme directly** → Wrong colors after theme switch. Use theme from callback.
4. **Not typing DynamicBorder param**`new DynamicBorder((s: string) => theme.fg("accent", s))`.
5. **Reusing disposed overlay components** → Create fresh instances each time.
6. **Styles bleeding across lines** → TUI resets per line. Reapply styles, or use `wrapTextWithAnsi()`.
7. **Not implementing invalidate()** → Theme changes don't take effect.
8. **Forgetting super.invalidate()**`override invalidate() { super.invalidate(); /* cleanup */ }`
9. **Timer not cleaned up** → Call `clearInterval` before `done()`.
10. **Using ctx.ui in non-interactive mode** → Check `ctx.hasUI` first.
</common_mistakes>

View file

@ -0,0 +1,126 @@
<overview>
Complete event reference with handler signatures, return types, and type narrowing utilities.
</overview>
<event_categories>
**Session events:** `session_start`, `session_before_switch`, `session_switch`, `session_before_fork`, `session_fork`, `session_before_compact`, `session_compact`, `session_before_tree`, `session_tree`, `session_shutdown`
**Agent events:** `before_agent_start`, `agent_start`, `agent_end`, `turn_start`, `turn_end`, `context`, `before_provider_request`, `message_start`, `message_update`, `message_end`
**Tool events:** `tool_call`, `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `tool_result`
**Input events:** `input`
**Model events:** `model_select`
**User bash events:** `user_bash`
**Special:** `session_directory` (CLI startup only, no `ctx` — receives only event)
</event_categories>
<handler_signature>
```typescript
pi.on("event_name", async (event, ctx: ExtensionContext) => {
// event — typed payload for this event
// ctx — access to UI, session, model, control flow
// Return undefined for no action, or a typed response
});
```
</handler_signature>
<key_events>
**before_agent_start** — Fired after user prompt, before agent loop. Primary hook for context injection and system prompt modification.
```typescript
pi.on("before_agent_start", async (event, ctx) => {
// event.prompt — user's prompt text
// event.images — attached images
// event.systemPrompt — current system prompt
return {
message: { customType: "my-ext", content: "Extra context", display: true },
systemPrompt: event.systemPrompt + "\n\nExtra instructions...",
};
});
```
**tool_call** — Fired before tool executes. Can block.
```typescript
import { isToolCallEventType } from "@singularity-forge/pi-coding-agent";
pi.on("tool_call", async (event, ctx) => {
if (isToolCallEventType("bash", event)) {
// event.input is typed as { command: string; timeout?: number }
if (event.input.command.includes("rm -rf")) {
return { block: true, reason: "Dangerous command" };
}
}
});
```
**tool_result** — Fired after tool executes. Can modify result. Handlers chain like middleware.
```typescript
import { isToolResultEventType } from "@singularity-forge/pi-coding-agent";
pi.on("tool_result", async (event, ctx) => {
if (isToolResultEventType("bash", event)) {
// event.details is typed as BashToolDetails
}
// Return partial patch: { content, details, isError }
// Omitted fields keep current values
});
```
**context** — Fired before each LLM call. Modify messages non-destructively.
```typescript
pi.on("context", async (event, ctx) => {
// event.messages is a deep copy — safe to modify
const filtered = event.messages.filter(m => !shouldPrune(m));
return { messages: filtered };
});
```
**input** — Fired when user input is received, before skill/template expansion.
```typescript
pi.on("input", async (event, ctx) => {
// event.text — raw input
// event.source — "interactive", "rpc", or "extension"
if (event.text.startsWith("?quick "))
return { action: "transform", text: `Respond briefly: ${event.text.slice(7)}` };
return { action: "continue" };
});
```
**model_select** — Fired when model changes.
```typescript
pi.on("model_select", async (event, ctx) => {
// event.model, event.previousModel, event.source ("set"|"cycle"|"restore")
});
```
</key_events>
<type_narrowing>
Built-in type guards for tool events:
```typescript
import { isToolCallEventType, isToolResultEventType } from "@singularity-forge/pi-coding-agent";
// Tool calls — narrows event.input type
if (isToolCallEventType("bash", event)) { /* event.input: { command, timeout? } */ }
if (isToolCallEventType("read", event)) { /* event.input: { path, offset?, limit? } */ }
if (isToolCallEventType("write", event)) { /* event.input: { path, content } */ }
if (isToolCallEventType("edit", event)) { /* event.input: { path, oldText, newText } */ }
// Tool results — narrows event.details type
if (isToolResultEventType("bash", event)) { /* event.details: BashToolDetails */ }
```
For custom tools, export your input type and use explicit type params:
```typescript
if (isToolCallEventType<"my_tool", MyToolInput>("my_tool", event)) {
event.input.action; // typed
}
```
</type_narrowing>

View file

@ -0,0 +1,64 @@
<overview>
The extension lifecycle from load to shutdown, including the full event flow.
</overview>
<loading>
Extensions load when SF starts (or on `/reload`). The default export function runs synchronously — subscribe to events and register tools/commands during this call.
```
SF starts
└─► Extension default function runs
├── pi.on("event", handler) ← Subscribe
├── pi.registerTool({...}) ← Register tools
├── pi.registerCommand(...) ← Register commands
└── pi.registerShortcut(...) ← Register shortcuts
└─► session_start fires
```
</loading>
<event_flow>
Full event flow per user prompt:
```
user sends prompt
├─► Extension commands checked (bypass if match)
├─► input event (can intercept/transform/handle)
├─► Skill/template expansion
├─► before_agent_start (inject message, modify system prompt)
├─► agent_start
│ ┌── Turn loop (repeats while LLM calls tools) ──┐
│ │ turn_start │
│ │ context (can modify messages sent to LLM) │
│ │ before_provider_request (inspect/replace payload)│
│ │ LLM responds → may call tools: │
│ │ tool_call (can BLOCK) │
│ │ tool_execution_start/update/end │
│ │ tool_result (can MODIFY) │
│ │ turn_end │
│ └────────────────────────────────────────────────┘
└─► agent_end
```
</event_flow>
<session_events>
| Event | When | Can Return |
|-------|------|------------|
| `session_start` | Session loads | — |
| `session_before_switch` | Before `/new` or `/resume` | `{ cancel: true }` |
| `session_switch` | After switch | — |
| `session_before_fork` | Before `/fork` | `{ cancel: true }`, `{ skipConversationRestore: true }` |
| `session_fork` | After fork | — |
| `session_before_compact` | Before compaction | `{ cancel: true }`, `{ compaction: {...} }` |
| `session_compact` | After compaction | — |
| `session_shutdown` | On exit | — |
</session_events>
<hot_reload>
Extensions in auto-discovered locations hot-reload with `/reload`:
- `session_shutdown` fires for old runtime
- Resources re-scanned
- `session_start` fires for new runtime
- Code after `await ctx.reload()` still runs from the pre-reload version — treat as terminal
</hot_reload>

View file

@ -0,0 +1,75 @@
<overview>
ExtensionAPI methods — the `pi` object received in the default export function.
</overview>
<core_registration>
| Method | Purpose |
|--------|---------|
| `pi.on(event, handler)` | Subscribe to events |
| `pi.registerTool(definition)` | Register LLM-callable tool |
| `pi.registerCommand(name, options)` | Register `/command` |
| `pi.registerShortcut(key, options)` | Register keyboard shortcut |
| `pi.registerFlag(name, options)` | Register CLI flag |
| `pi.registerMessageRenderer(customType, renderer)` | Custom message rendering |
| `pi.registerProvider(name, config)` | Register/override model provider |
| `pi.unregisterProvider(name)` | Remove a provider |
</core_registration>
<messaging>
| Method | Purpose |
|--------|---------|
| `pi.sendMessage(message, options?)` | Inject custom message into session |
| `pi.sendUserMessage(content, options?)` | Send user message (triggers turn) |
**Delivery modes for `sendMessage`:**
- `"steer"` (default) — Interrupts streaming after current tool
- `"followUp"` — Waits for agent to finish all tools
- `"nextTurn"` — Queued for next user prompt
```typescript
pi.sendMessage({
customType: "my-extension",
content: "Additional context",
display: true,
details: { ... },
}, { deliverAs: "steer", triggerTurn: true });
```
</messaging>
<state_session>
| Method | Purpose |
|--------|---------|
| `pi.appendEntry(customType, data?)` | Persist state (NOT sent to LLM) |
| `pi.setSessionName(name)` | Set session display name |
| `pi.getSessionName()` | Get session name |
| `pi.setLabel(entryId, label)` | Bookmark entry for `/tree` |
</state_session>
<tool_management>
```typescript
const active = pi.getActiveTools(); // ["read", "bash", "edit", "write"]
const all = pi.getAllTools(); // [{ name, description }, ...]
pi.setActiveTools(["read", "bash"]); // Enable/disable tools
```
</tool_management>
<model_management>
```typescript
const model = ctx.modelRegistry.find("anthropic", "claude-sonnet-4-5");
if (model) {
const success = await pi.setModel(model); // Returns false if no API key
}
pi.getThinkingLevel(); // "off" | "minimal" | "low" | "medium" | "high" | "xhigh"
pi.setThinkingLevel("high");
```
</model_management>
<utilities>
| Method | Purpose |
|--------|---------|
| `pi.exec(cmd, args, opts?)` | Shell command (prefer over child_process) |
| `pi.events` | Shared event bus for inter-extension communication |
| `pi.getFlag(name)` | Get CLI flag value |
| `pi.getCommands()` | All available slash commands |
</utilities>

View file

@ -0,0 +1,53 @@
<overview>
ExtensionContext (`ctx`) — available in all event handlers (except `session_directory`).
</overview>
<ui_methods>
**Dialogs (blocking — wait for user response):**
```typescript
const choice = await ctx.ui.select("Pick one:", ["A", "B", "C"]);
const ok = await ctx.ui.confirm("Delete?", "This cannot be undone");
const name = await ctx.ui.input("Name:", "placeholder");
const text = await ctx.ui.editor("Edit:", "prefilled text");
// Timed dialog — auto-dismiss after timeout
const ok = await ctx.ui.confirm("Auto-confirm?", "Proceeds in 5s", { timeout: 5000 });
```
**Non-blocking UI:**
```typescript
ctx.ui.notify("Done!", "info"); // Toast: "info" | "warning" | "error"
ctx.ui.setStatus("my-ext", "● Active"); // Footer status
ctx.ui.setStatus("my-ext", undefined); // Clear
ctx.ui.setWidget("my-id", ["Line 1", "Line 2"]); // Widget above editor
ctx.ui.setWidget("my-id", ["Below!"], { placement: "belowEditor" });
ctx.ui.setTitle("sf - my project"); // Terminal title
ctx.ui.setEditorText("Prefill"); // Set editor content
ctx.ui.setWorkingMessage("Analyzing..."); // Working message during streaming
ctx.ui.setToolsExpanded(true); // Expand tool output
```
</ui_methods>
<ctx_properties>
| Property/Method | Purpose |
|----------------|---------|
| `ctx.hasUI` | `false` in print/JSON mode — check before dialogs |
| `ctx.cwd` | Current working directory |
| `ctx.sessionManager` | Read-only session state |
| `ctx.modelRegistry` / `ctx.model` | Model access |
| `ctx.isIdle()` / `ctx.abort()` / `ctx.hasPendingMessages()` | Agent state |
| `ctx.shutdown()` | Request graceful exit (deferred until idle) |
| `ctx.getContextUsage()` | Current context token usage |
| `ctx.compact(options?)` | Trigger compaction |
| `ctx.getSystemPrompt()` | Current effective system prompt |
</ctx_properties>
<session_manager>
```typescript
ctx.sessionManager.getEntries() // All entries
ctx.sessionManager.getBranch() // Current branch
ctx.sessionManager.getLeafId() // Current leaf entry ID
ctx.sessionManager.getSessionFile() // Session JSONL path
ctx.sessionManager.getLabel(entryId) // Entry label
```
</session_manager>

View file

@ -0,0 +1,37 @@
<overview>
Non-negotiable rules and common gotchas when building SF extensions.
</overview>
<must_follow>
1. **Use `StringEnum` for string enums**`Type.Union`/`Type.Literal` breaks Google's API.
2. **Truncate tool output** — Large output causes context overflow, compaction failures, degraded performance. Limit: 50KB / 2000 lines.
3. **Use theme from callback** — Don't import theme directly. Use the `theme` parameter from `ctx.ui.custom()` or render functions.
4. **`DynamicBorder` color param** — Type as `(s: string) => theme.fg("accent", s)`.
5. **Call `tui.requestRender()` after state changes** in `handleInput`.
6. **Return `{ render, invalidate, handleInput }`** from custom components.
7. **Lines must not exceed `width`** in `render()` — use `truncateToWidth()`.
8. **Session control methods ONLY in commands**`waitForIdle()`, `newSession()`, `fork()`, `navigateTree()`, `reload()` will **deadlock** in event handlers.
9. **Strip leading `@` from path arguments** — some models add it.
10. **Store state in tool result `details`** for proper branching support.
</must_follow>
<common_patterns>
- Rebuild component on `invalidate()` when pre-baking theme colors
- Check `signal?.aborted` in long-running tool executions
- Use `pi.exec()` instead of `child_process` for shell commands
- Overlay components are **disposed when closed** — create fresh instances each time
- Treat `ctx.reload()` as terminal — code after runs from pre-reload version
- Check `ctx.hasUI` before dialog methods (false in print/JSON mode)
- Extension errors are logged but don't crash SF — tool_call handler errors fail-safe (block the tool)
</common_patterns>
<_sf_paths>
**SF extension paths (community/user-installed extensions):**
- Global: `~/.sf/agent/extensions/*.ts`
- Global (subdir): `~/.sf/agent/extensions/*/index.ts`
- Project-local: `.sf/extensions/*.ts`
- Project-local (subdir): `.sf/extensions/*/index.ts`
Note: `~/.sf/agent/extensions/` is reserved for bundled extensions synced from the sf package.
Community extensions placed there are silently ignored by the loader.
</_sf_paths>

View file

@ -0,0 +1,32 @@
<overview>
Mode behavior determines which UI methods work. Extensions may run in non-interactive modes where dialogs are unavailable.
</overview>
<mode_table>
| Mode | UI Methods | Notes |
|------|-----------|-------|
| **Interactive** (default) | Full TUI | Normal operation — all UI works |
| **RPC** (`--mode rpc`) | JSON protocol | Host handles UI, dialogs work via sub-protocol |
| **JSON** (`--mode json`) | No-op | Event stream to stdout, no UI |
| **Print** (`-p`) | No-op | Extensions run but can't prompt users |
</mode_table>
<checking_ui>
**Always check `ctx.hasUI`** before calling dialog methods:
```typescript
if (ctx.hasUI) {
const ok = await ctx.ui.confirm("Delete?", "Sure?");
if (!ok) return;
} else {
// Default behavior for non-interactive mode
// Or just proceed without confirmation
}
```
`ctx.hasUI` is `false` in print mode (`-p`) and JSON mode. `true` in interactive and RPC mode.
</checking_ui>
<fire_and_forget>
Non-blocking methods (`notify`, `setStatus`, `setWidget`, `setTitle`, `setEditorText`) are safe in all modes — they're no-ops when no UI is available.
</fire_and_forget>

View file

@ -0,0 +1,89 @@
<overview>
Model and provider management — switching models, registering custom providers with OAuth, and reacting to model changes.
</overview>
<switching_models>
```typescript
const model = ctx.modelRegistry.find("anthropic", "claude-sonnet-4-5");
if (model) {
const success = await pi.setModel(model);
if (!success) ctx.ui.notify("No API key for this model", "error");
}
// Thinking level
pi.getThinkingLevel(); // "off" | "minimal" | "low" | "medium" | "high" | "xhigh"
pi.setThinkingLevel("high"); // Clamped to model capabilities
```
</switching_models>
<register_provider>
```typescript
pi.registerProvider("my-proxy", {
baseUrl: "https://proxy.example.com",
apiKey: "PROXY_API_KEY", // Env var name or literal
api: "anthropic-messages", // or "openai-completions", "openai-responses"
headers: { "X-Custom": "value" }, // Optional custom headers
authHeader: true, // Auto-add Authorization: Bearer header
models: [
{
id: "claude-sonnet-4-20250514",
name: "Claude 4 Sonnet (proxy)",
reasoning: false,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 16384,
}
],
});
// Override just baseUrl for an existing provider (keeps all models)
pi.registerProvider("anthropic", {
baseUrl: "https://proxy.example.com",
});
// Remove a provider (restores any overridden built-in models)
pi.unregisterProvider("my-proxy");
```
Takes effect immediately after initial load phase — no `/reload` required.
</register_provider>
<oauth_provider>
Register a provider with OAuth support for `/login`:
```typescript
pi.registerProvider("corporate-ai", {
baseUrl: "https://ai.corp.com",
api: "openai-responses",
models: [/* ... */],
oauth: {
name: "Corporate AI (SSO)",
async login(callbacks) {
callbacks.onAuth({ url: "https://sso.corp.com/..." });
const code = await callbacks.onPrompt({ message: "Enter code:" });
return { refresh: code, access: code, expires: Date.now() + 3600000 };
},
async refreshToken(credentials) {
return credentials; // Refresh logic
},
getApiKey(credentials) {
return credentials.access;
},
},
});
```
</oauth_provider>
<model_events>
React to model changes:
```typescript
pi.on("model_select", async (event, ctx) => {
// event.model — newly selected model
// event.previousModel — previous model (undefined if first)
// event.source — "set" | "cycle" | "restore"
ctx.ui.setStatus("model", `${event.model.provider}/${event.model.id}`);
});
```
</model_events>

View file

@ -0,0 +1,55 @@
<overview>
Packaging extensions for distribution via npm, git, or local paths. Creating SF packages.
</overview>
<package_manifest>
Add a `pi` manifest to `package.json`:
```json
{
"name": "my-sf-package",
"keywords": ["pi-package"],
"pi": {
"extensions": ["./extensions"],
"skills": ["./skills"],
"prompts": ["./prompts"],
"themes": ["./themes"]
}
}
```
</package_manifest>
<installing>
```bash
sf install npm:@foo/bar@1.0.0
sf install git:github.com/user/repo@v1
sf install ./local/path
# Try without installing:
sf -e npm:@foo/bar
```
</installing>
<convention_directories>
If no `pi` manifest exists, auto-discovers:
- `extensions/``.ts` and `.js` files
- `skills/``SKILL.md` folders
- `prompts/``.md` files
- `themes/``.json` files
</convention_directories>
<dependencies>
- List `@singularity-forge/pi-ai`, `@singularity-forge/pi-coding-agent`, `@singularity-forge/pi-tui`, `@sinclair/typebox` in `peerDependencies` with `"*"` — they're bundled by the runtime.
- Other npm deps go in `dependencies`. The runtime runs `npm install` on package installation.
</dependencies>
<gallery_metadata>
```json
{
"pi": {
"video": "https://example.com/demo.mp4",
"image": "https://example.com/screenshot.png"
}
}
```
</gallery_metadata>

View file

@ -0,0 +1,90 @@
<overview>
Remote execution via pluggable operations, spawnHook for bash, and tool override patterns.
</overview>
<pluggable_operations>
Built-in tools support pluggable operations for SSH, containers, etc.:
```typescript
import { createReadTool, createBashTool, createWriteTool } from "@singularity-forge/pi-coding-agent";
// Create tool with custom remote operations
const remoteBash = createBashTool(cwd, {
operations: {
execute: (cmd) => sshExec(remote, cmd),
},
});
```
**Operations interfaces:** `ReadOperations`, `WriteOperations`, `EditOperations`, `BashOperations`, `LsOperations`, `GrepOperations`, `FindOperations`
</pluggable_operations>
<spawn_hook>
The bash tool supports a `spawnHook` to modify commands before execution:
```typescript
const bashTool = createBashTool(cwd, {
spawnHook: ({ command, cwd, env }) => ({
command: `source ~/.profile\n${command}`,
cwd: `/mnt/sandbox${cwd}`,
env: { ...env, CI: "1" },
}),
});
```
</spawn_hook>
<ssh_pattern>
Full SSH pattern with flag-based switching:
```typescript
import { createBashTool, type ExtensionAPI } from "@singularity-forge/pi-coding-agent";
export default function (pi: ExtensionAPI) {
pi.registerFlag("ssh", { description: "SSH target", type: "string" });
const localBash = createBashTool(process.cwd());
pi.registerTool({
...localBash,
async execute(id, params, signal, onUpdate, ctx) {
const sshTarget = pi.getFlag("--ssh");
if (sshTarget) {
const remoteBash = createBashTool(process.cwd(), {
operations: createSSHOperations(sshTarget),
});
return remoteBash.execute(id, params, signal, onUpdate);
}
return localBash.execute(id, params, signal, onUpdate);
},
});
}
```
</ssh_pattern>
<tool_override_pattern>
Override built-in tools for logging/access control — omit renderCall/renderResult to keep built-in rendering:
```typescript
import { createReadTool } from "@singularity-forge/pi-coding-agent";
import { Type } from "@sinclair/typebox";
pi.registerTool({
name: "read", // Same name = overrides built-in
label: "Read (Logged)",
description: "Read file contents with logging",
parameters: Type.Object({
path: Type.String(),
offset: Type.Optional(Type.Number()),
limit: Type.Optional(Type.Number()),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
console.log(`[AUDIT] Reading: ${params.path}`);
const builtIn = createReadTool(ctx.cwd);
return builtIn.execute(toolCallId, params, signal, onUpdate);
},
// Omit renderCall/renderResult → built-in renderer used automatically
});
```
**Must match exact result shape** including `details` type.
</tool_override_pattern>

View file

@ -0,0 +1,70 @@
<overview>
State management patterns for extensions — tool result details (branch-safe) and appendEntry (private).
</overview>
<tool_result_details>
**Recommended for stateful tools.** State in `details` works correctly with branching/forking.
```typescript
export default function (pi: ExtensionAPI) {
let items: string[] = [];
// Reconstruct state from session on load
pi.on("session_start", async (_event, ctx) => reconstructState(ctx));
pi.on("session_switch", async (_event, ctx) => reconstructState(ctx));
pi.on("session_fork", async (_event, ctx) => reconstructState(ctx));
pi.on("session_tree", async (_event, ctx) => reconstructState(ctx));
const reconstructState = (ctx: ExtensionContext) => {
items = [];
for (const entry of ctx.sessionManager.getBranch()) {
if (entry.type === "message" && entry.message.role === "toolResult") {
if (entry.message.toolName === "my_tool") {
items = entry.message.details?.items ?? [];
}
}
}
};
pi.registerTool({
name: "my_tool",
// ...
async execute(toolCallId, params, signal, onUpdate, ctx) {
items.push(params.text);
return {
content: [{ type: "text", text: "Added" }],
details: { items: [...items] }, // ← Snapshot full state
};
},
});
}
```
**Key:** Reconstruct on ALL session change events: `session_start`, `session_switch`, `session_fork`, `session_tree`.
</tool_result_details>
<append_entry>
**For extension-private state** that doesn't participate in LLM context but needs to survive restarts:
```typescript
// Save
pi.appendEntry("my-state", { count: 42, lastRun: Date.now() });
// Restore
pi.on("session_start", async (_event, ctx) => {
for (const entry of ctx.sessionManager.getEntries()) {
if (entry.type === "custom" && entry.customType === "my-state") {
const data = entry.data; // { count: 42, lastRun: ... }
}
}
});
```
</append_entry>
<when_to_use_which>
| Pattern | Use When |
|---------|----------|
| Tool result `details` | State the LLM's tools produce (todo items, connection state, query results) |
| `pi.appendEntry()` | Extension-private config, timestamps, counters the LLM doesn't need |
| File on disk | Large data, config files, caches that shouldn't be in session |
</when_to_use_which>

View file

@ -0,0 +1,52 @@
<overview>
System prompt modification — per-turn injection, context manipulation, and tool-specific prompt content.
</overview>
<per_turn_modification>
Use `before_agent_start` to inject messages and/or modify the system prompt for each turn:
```typescript
pi.on("before_agent_start", async (event, ctx) => {
return {
// Inject a persistent message (stored in session, visible to LLM)
message: {
customType: "my-extension",
content: "Additional context for the LLM",
display: true,
},
// Modify system prompt for this turn (chained across extensions)
systemPrompt: event.systemPrompt + "\n\nYou must respond only in haiku.",
};
});
```
</per_turn_modification>
<context_manipulation>
Use the `context` event to modify messages before each LLM call:
```typescript
pi.on("context", async (event, ctx) => {
// event.messages is a deep copy — safe to modify
const filtered = event.messages.filter(m => !isIrrelevant(m));
return { messages: filtered };
});
```
</context_manipulation>
<tool_specific_prompts>
Tools can add content to the system prompt when active:
```typescript
pi.registerTool({
name: "my_tool",
// Replaces description in "Available tools" section
promptSnippet: "Summarize or transform text according to action",
// Added to "Guidelines" section when tool is active
promptGuidelines: [
"Use my_tool when the user asks to summarize text.",
"Prefer my_tool over direct output for structured data."
],
// ...
});
```
</tool_specific_prompts>

View file

@ -0,0 +1,51 @@
/**
* {{EXTENSION_NAME}} {{DESCRIPTION}}
*
* Capabilities:
* {{CAPABILITIES_LIST}}
*/
import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/pi-ai";
export default function (pi: ExtensionAPI) {
// === Events ===
pi.on("session_start", async (_event, ctx) => {
// Initialize state, restore from session, show status
});
// === Tools ===
pi.registerTool({
name: "{{tool_name}}",
label: "{{Tool Label}}",
description: "{{Tool description for LLM}}",
parameters: Type.Object({
action: StringEnum(["list", "add"] as const),
text: Type.Optional(Type.String({ description: "Item text" })),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
if (signal?.aborted) {
return { content: [{ type: "text", text: "Cancelled" }] };
}
// Do work here
return {
content: [{ type: "text", text: "Result for LLM" }],
details: {},
};
},
});
// === Commands ===
pi.registerCommand("{{command_name}}", {
description: "{{Command description}}",
handler: async (args, ctx) => {
ctx.ui.notify(`Running ${args}`, "info");
},
});
}

View file

@ -0,0 +1,143 @@
/**
* {{EXTENSION_NAME}} Stateful tool with persistence
*
* State is stored in tool result details for proper branching support.
*/
import type { ExtensionAPI, ExtensionContext } from "@singularity-forge/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/pi-ai";
import { Text, truncateToWidth, matchesKey, Key } from "@singularity-forge/pi-tui";
interface {{ItemType}} {
id: number;
// Add fields
}
interface {{ToolDetails}} {
action: string;
items: {{ItemType}}[];
nextId: number;
error?: string;
}
export default function (pi: ExtensionAPI) {
let items: {{ItemType}}[] = [];
let nextId = 1;
// Reconstruct state from session
const reconstructState = (ctx: ExtensionContext) => {
items = [];
nextId = 1;
for (const entry of ctx.sessionManager.getBranch()) {
if (entry.type === "message" && entry.message.role === "toolResult") {
if (entry.message.toolName === "{{tool_name}}") {
const details = entry.message.details as {{ToolDetails}} | undefined;
if (details) {
items = details.items;
nextId = details.nextId;
}
}
}
}
};
// Reconstruct on ALL session change events
pi.on("session_start", async (_event, ctx) => reconstructState(ctx));
pi.on("session_switch", async (_event, ctx) => reconstructState(ctx));
pi.on("session_fork", async (_event, ctx) => reconstructState(ctx));
pi.on("session_tree", async (_event, ctx) => reconstructState(ctx));
// Register the tool
pi.registerTool({
name: "{{tool_name}}",
label: "{{Tool Label}}",
description: "{{Description for LLM}}",
parameters: Type.Object({
action: StringEnum(["list", "add", "remove"] as const),
text: Type.Optional(Type.String({ description: "Item text" })),
id: Type.Optional(Type.Number({ description: "Item ID" })),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
if (signal?.aborted) {
return { content: [{ type: "text", text: "Cancelled" }] };
}
switch (params.action) {
case "list":
return {
content: [{ type: "text", text: items.length ? JSON.stringify(items) : "No items" }],
details: { action: "list", items: [...items], nextId } as {{ToolDetails}},
};
case "add": {
if (!params.text) throw new Error("text required for add");
const item: {{ItemType}} = { id: nextId++ /* , ... */ };
items.push(item);
return {
content: [{ type: "text", text: `Added #${item.id}` }],
details: { action: "add", items: [...items], nextId } as {{ToolDetails}},
};
}
case "remove": {
if (params.id === undefined) throw new Error("id required for remove");
const idx = items.findIndex(i => i.id === params.id);
if (idx === -1) throw new Error(`Item #${params.id} not found`);
items.splice(idx, 1);
return {
content: [{ type: "text", text: `Removed #${params.id}` }],
details: { action: "remove", items: [...items], nextId } as {{ToolDetails}},
};
}
default:
throw new Error(`Unknown action: ${params.action}`);
}
},
// Custom rendering
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("{{tool_name}} "));
text += theme.fg("muted", args.action);
return new Text(text, 0, 0);
},
renderResult(result, { expanded }, theme) {
const details = result.details as {{ToolDetails}} | undefined;
if (!details) return new Text("", 0, 0);
if (details.error) return new Text(theme.fg("error", details.error), 0, 0);
return new Text(theme.fg("success", `${details.action} (${details.items.length} items)`), 0, 0);
},
});
// User command to view state
pi.registerCommand("{{command_name}}", {
description: "View {{items}}",
handler: async (_args, ctx) => {
if (!ctx.hasUI) {
ctx.ui.notify("Requires interactive mode", "error");
return;
}
await ctx.ui.custom<void>((_tui, theme, _kb, done) => ({
render(width: number): string[] {
const lines = [
"",
truncateToWidth(theme.fg("accent", ` {{Items}} (${items.length}) `), width),
"",
];
for (const item of items) {
lines.push(truncateToWidth(` #${item.id}`, width));
}
lines.push("", truncateToWidth(theme.fg("dim", " Press Escape to close"), width), "");
return lines;
},
handleInput(data: string) {
if (matchesKey(data, Key.escape)) done();
},
invalidate() {},
}));
},
});
}

View file

@ -0,0 +1,58 @@
// SF — Extension template import path validation
// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import { readFileSync, readdirSync, statSync } from "node:fs";
import { join, dirname, relative } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const skillRoot = join(__dirname, "..");
function walk(dir: string): string[] {
const out: string[] = [];
for (const entry of readdirSync(dir)) {
const full = join(dir, entry);
if (statSync(full).isDirectory()) {
out.push(...walk(full));
} else {
out.push(full);
}
}
return out;
}
describe("extension templates use @singularity-forge/* imports", () => {
const templates = ["extension-skeleton.ts", "stateful-tool-skeleton.ts"];
for (const template of templates) {
it(`${template} uses @singularity-forge/pi-coding-agent (not @mariozechner)`, () => {
const content = readFileSync(join(__dirname, template), "utf-8");
assert.ok(content.includes("@singularity-forge/pi-coding-agent"), `Expected @singularity-forge/pi-coding-agent import in ${template}`);
assert.ok(!content.includes("@mariozechner/"), `Found stale @mariozechner/ import in ${template}`);
});
}
it("extension-skeleton.ts uses @singularity-forge/pi-ai for StringEnum", () => {
const content = readFileSync(join(__dirname, "extension-skeleton.ts"), "utf-8");
assert.ok(content.includes("@singularity-forge/pi-ai"), "Expected @singularity-forge/pi-ai import");
});
it("stateful-tool-skeleton.ts uses @singularity-forge/pi-tui", () => {
const content = readFileSync(join(__dirname, "stateful-tool-skeleton.ts"), "utf-8");
assert.ok(content.includes("@singularity-forge/pi-tui"), "Expected @singularity-forge/pi-tui import");
});
it("no @mariozechner/ references anywhere in create-sf-extension/", () => {
const offenders: string[] = [];
for (const file of walk(skillRoot)) {
if (file.endsWith("templates.test.ts")) continue;
const content = readFileSync(file, "utf-8");
if (content.includes("@mariozechner/")) {
offenders.push(relative(skillRoot, file));
}
}
assert.deepEqual(offenders, [], `Stale @mariozechner/ references found in: ${offenders.join(", ")}`);
});
});

View file

@ -0,0 +1,57 @@
<required_reading>
Read the reference file for the specific capability being added:
- Tools → references/custom-tools.md
- Commands → references/custom-commands.md
- Events → references/events-reference.md
- UI → references/custom-ui.md
- Rendering → references/custom-rendering.md
- State → references/state-management.md
- System prompt → references/system-prompt-modification.md
</required_reading>
<process>
## Step 1: Identify the Extension
Locate the existing extension file. Check:
- `~/.sf/agent/extensions/` (global community extensions)
- `.sf/extensions/` (project-local)
Read the current extension code to understand its structure.
## Step 2: Add the Capability
Add the new registration/hook inside the existing `export default function (pi: ExtensionAPI)` body. Follow the patterns in the relevant reference file.
If the extension needs new imports, add them at the top of the file.
## Step 3: Handle Structural Changes
**Single file → Directory**: If the extension is outgrowing a single file:
1. Create `~/.sf/agent/extensions/my-extension/`
2. Move the file to `index.ts`
3. Extract helpers to separate files
**Adding npm dependencies**: If new packages are needed:
1. Create `package.json` in the extension directory
2. Add dependencies
3. Run `npm install`
4. Add `"pi": { "extensions": ["./index.ts"] }` to package.json
## Step 4: Test
```bash
/reload
```
Verify the new capability works alongside existing ones.
</process>
<success_criteria>
Capability addition is complete when:
- [ ] New capability added without breaking existing functionality
- [ ] All new imports resolve
- [ ] `/reload` succeeds
- [ ] New tool/command/hook tested with real invocation
</success_criteria>

View file

@ -0,0 +1,176 @@
<required_reading>
**Read these reference files before proceeding:**
1. references/extension-lifecycle.md
2. references/custom-tools.md (if building tools)
3. references/custom-commands.md (if building commands)
4. references/events-reference.md (if building event hooks)
5. references/key-rules-gotchas.md (always)
</required_reading>
<process>
## Step 1: Determine Scope and Placement
Ask the user:
- **Global** (`~/.sf/agent/extensions/`) — Available in all SF sessions
- **Project-local** (`.sf/extensions/`) — Available only in this project
## Step 2: Determine Extension Capabilities
Identify what the extension needs from the user's description:
| Capability | API | When |
|------------|-----|------|
| Custom tool (LLM-callable) | `pi.registerTool()` | LLM needs to perform new actions |
| Slash command | `pi.registerCommand()` | User needs direct actions |
| Event interception | `pi.on("event", ...)` | Block/modify tool calls, inject context, react to lifecycle |
| Custom UI | `ctx.ui.custom()` | Complex interactive displays |
| System prompt modification | `before_agent_start` event | Add per-turn instructions |
| Context filtering | `context` event | Modify messages sent to LLM |
| State persistence | `details` in tool results or `pi.appendEntry()` | Stateful behavior |
| Custom rendering | `renderCall` / `renderResult` | Control how tools appear in TUI |
| Provider management | `pi.registerProvider()` | Custom model endpoints |
| Keyboard shortcut | `pi.registerShortcut()` | Hotkey triggers |
## Step 3: Choose Extension Structure
**Directory with index.ts** — the standard pattern for all extensions:
```
~/.sf/agent/extensions/my-extension/
├── extension-manifest.json # Required — declares capabilities
├── index.ts # Entry point (must export default function)
├── tools.ts # Optional — tool implementations
└── utils.ts # Optional — shared utilities
```
**Package with dependencies** — when npm packages are needed:
```
~/.sf/agent/extensions/my-extension/
├── extension-manifest.json
├── package.json
├── src/index.ts
└── node_modules/
```
For packages, `package.json` needs:
```json
{
"name": "my-extension",
"dependencies": { ... },
"pi": { "extensions": ["./src/index.ts"] }
}
```
## Step 3b: Create the Extension Manifest
Every extension must include an `extension-manifest.json`:
```json
{
"id": "my-extension",
"name": "My Extension",
"version": "1.0.0",
"description": "What this extension does in one line",
"tier": "community",
"requires": { "platform": ">=2.29.0" },
"provides": {
"tools": ["my_tool"],
"commands": ["mycommand"],
"hooks": ["session_start"]
}
}
```
Only include non-empty arrays in `provides`. See `docs/extension-sdk/manifest-spec.md` for the full spec.
## Step 4: Write the Extension
Start with the skeleton:
```typescript
import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
export default function (pi: ExtensionAPI) {
// Register events, tools, commands here
}
```
Then add capabilities based on Step 2. Reference the appropriate reference files for each capability.
**Tool registration pattern:**
```typescript
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/pi-ai";
pi.registerTool({
name: "my_tool",
label: "My Tool",
description: "What this tool does (shown to LLM)",
parameters: Type.Object({
action: StringEnum(["list", "add"] as const),
text: Type.Optional(Type.String({ description: "Item text" })),
}),
async execute(toolCallId, params, signal, onUpdate, ctx) {
if (signal?.aborted) return { content: [{ type: "text", text: "Cancelled" }] };
return {
content: [{ type: "text", text: "Result for LLM" }],
details: { data: "for rendering and state" },
};
},
});
```
**Command registration pattern:**
```typescript
pi.registerCommand("mycommand", {
description: "What this command does",
handler: async (args, ctx) => {
ctx.ui.notify(`Running with args: ${args}`, "info");
},
});
```
**Event hook pattern:**
```typescript
pi.on("tool_call", async (event, ctx) => {
if (event.toolName === "bash" && event.input.command?.includes("rm -rf")) {
return { block: true, reason: "Blocked dangerous command" };
}
});
```
## Step 5: Test the Extension
```bash
# Quick test without installing
sf -e ./path/to/my-extension.ts
# Or place in extensions dir and reload
/reload
```
Verify:
- Extension loads without errors (check SF startup output)
- Tools appear when LLM is asked to use them
- Commands respond to `/mycommand`
- Event hooks trigger at expected points
## Step 6: Iterate
Fix issues, add features, refine. Use `/reload` for hot-reload during development.
</process>
<success_criteria>
Extension creation is complete when:
- [ ] Extension directory created with index.ts and extension-manifest.json
- [ ] Manifest `provides` accurately lists all registered tools, commands, hooks, shortcuts
- [ ] All imports resolve (TypeBox, pi-ai, pi-coding-agent, pi-tui as needed)
- [ ] Tools use `StringEnum` for string enums (not `Type.Union`/`Type.Literal`)
- [ ] Tool output is truncated if variable-length
- [ ] State stored in `details` if extension is stateful
- [ ] `ctx.hasUI` checked before dialog methods
- [ ] Extension loads on `/reload` without errors
- [ ] Tools callable by LLM, commands by user
- [ ] Tested with at least one real invocation
</success_criteria>

View file

@ -0,0 +1,76 @@
<required_reading>
1. references/key-rules-gotchas.md
2. references/extension-lifecycle.md
</required_reading>
<process>
## Step 1: Identify the Symptom
| Symptom | Likely Cause |
|---------|--------------|
| Extension not loading | File not in discovery path, syntax error, missing export default |
| Tool not appearing for LLM | Tool not registered, `pi.setActiveTools()` excluding it, tool name conflict |
| Command not responding | Command not registered, name collision with built-in |
| Event not firing | Wrong event name, handler returning too early, handler error (logged but swallowed) |
| UI not rendering | `ctx.hasUI` is false (print mode), render lines exceed width, component not returning lines |
| State lost on restart | State not stored in `details` or `appendEntry`, not reconstructing on `session_start` |
| Google API errors | Using `Type.Union`/`Type.Literal` instead of `StringEnum` |
| Context overflow | Tool output not truncated |
| Deadlock/hang | Session control methods called from event handler (must be in command handler only) |
| Render garbage | Theme imported directly instead of from callback, missing `truncateToWidth()` |
## Step 2: Check Extension Loading
```bash
# Test in isolation
sf -e ./path/to/extension.ts
# Check SF startup output for errors
# Extension errors are logged but don't crash SF
```
## Step 3: Verify File Location
Community extensions must be in auto-discovery paths:
- `~/.sf/agent/extensions/*.ts`
- `~/.sf/agent/extensions/*/index.ts`
- `.sf/extensions/*.ts`
- `.sf/extensions/*/index.ts`
Note: `~/.sf/agent/extensions/` is reserved for bundled extensions synced from the sf package.
The file must `export default function(pi: ExtensionAPI) { ... }`.
## Step 4: Check for Common Mistakes
Read `../references/key-rules-gotchas.md` and verify each rule against the extension code.
## Step 5: Add Debugging
```typescript
// Temporary: log to stderr (visible in SF output)
console.error("[my-ext] Loading...");
pi.on("session_start", async (_event, ctx) => {
console.error("[my-ext] Session started");
ctx.ui.notify("Extension loaded", "info");
});
```
## Step 6: Fix and Reload
Apply the fix and test:
```
/reload
```
</process>
<success_criteria>
Debugging is complete when:
- [ ] Root cause identified
- [ ] Fix applied
- [ ] Extension loads and functions correctly after `/reload`
- [ ] No regression in existing functionality
</success_criteria>

View file

@ -0,0 +1,153 @@
---
name: forensics
description: Post-mortem a failed sf auto-mode run. Traces from symptom to root cause using `.sf/activity/*.jsonl`, `.sf/journal/YYYY-MM-DD.jsonl`, `.sf/metrics.json`, and `.sf/auto.lock`. Produces a filing-ready bug report with file:line references and a concrete fix suggestion. Use when asked to "forensics", "post-mortem", "why did auto-mode fail", "trace the stuck loop", "debug the crash", after `/sf forensics` is invoked, or when a session ended in an unexpected terminal state. Reads existing artifacts — does NOT re-run anything.
---
<objective>
Turn scattered sf runtime artifacts into one coherent cause chain. The deliverable is a GitHub-issue-ready report that names the file and line where the bug lives, cites the evidence, and proposes a fix. Forensics is archaeology, not re-run — no modifying state, no triggering commands, just reading the paper trail.
</objective>
<context>
sf persists a lot of runtime evidence under `.sf/`:
- `activity/{seq}-{unitType}-{unitId}.jsonl` — full tool-call and message stream per unit
- `journal/YYYY-MM-DD.jsonl` — iteration-level events (dispatch-match, stuck-detected, guard-block, unit-start/end, terminal)
- `metrics.json` — token/cost ledger; duplicate `type/id` entries indicate a stuck loop
- `auto.lock` — JSON snapshot of the currently-owning PID; stale lock = crash mid-unit
- `forensics/` — saved prior reports
- `debug/` — debug logs if enabled
- `runtime/paused-session.json` — serialized session when auto-mode paused
- `doctor-history.jsonl` — doctor check history
The `/sf forensics` command pre-computes a forensic report with anomalies flagged. This skill is the manual investigation that goes deeper, or runs when the automated report isn't enough.
Invocation points:
- `/sf forensics` has been run and user wants deeper analysis
- Auto-mode exited unexpectedly, no obvious cause
- Same unit dispatched multiple times (stuck loop suspected)
- A session crashed and `auto.lock` is stale
- User reports "it just stopped" or "it did the wrong thing"
</context>
<core_principle>
**READ-ONLY.** Forensics touches no live state. Non-mutating inspection commands (e.g., `ps`, `top -b`, `cat /proc/*`) are allowed for checking process status or reading system files. Strictly prohibited: `sf_*` writes, commands that modify state, executing binaries that produce side effects, writing to files (outside the final report), or re-running the failed unit. The evidence must stay pristine for future investigations.
**SYMPTOM → ROOT CAUSE, WITH CITATIONS.** Every claim in the report is backed by an artifact path and either a line number or a JSONL field. "The loop got stuck because of a race" is not useful; "`.sf/journal/2026-04-19.jsonl:142` shows `stuck-detected` with flowId X, caused by `dispatch-guard.ts:87` returning the same unit after `unit-end`" is.
**PRE-PARSED LEADS, NOT CONCLUSIONS.** If `/sf forensics` has surfaced anomalies, treat them as hypotheses to verify, not answers.
</core_principle>
<process>
## Step 1: Locate the evidence
Read what's in `.sf/`:
1. `auto.lock` — is it stale? Check PID against `ps` (read-only inspection, allowed). Stale = crash.
2. Most recent `.sf/activity/*.jsonl` — sort by mtime, newest first. That's the last unit that ran.
3. Today's `.sf/journal/YYYY-MM-DD.jsonl` — the iteration-level view.
4. `.sf/metrics.json` — does any `type/id` appear more than once? (stuck loop signal)
5. `.sf/runtime/paused-session.json` — if present, what was the pause reason?
## Step 2: Reconstruct the failure from the activity log
Activity JSONL format:
- Each line is `{type: "message", message: {...}}`.
- `message.role: "assistant"``content[]` with `type: "text"` reasoning and `type: "toolCall"` invocations.
- `message.role: "toolResult"``{toolCallId, toolName, isError, content}`.
- `usage` on assistant messages tracks tokens and cost.
To trace a failure:
1. Search for `isError: true` tool results in the last activity log. That's usually the proximate symptom.
2. Walk backwards to the assistant message that made the call. Read the `text` content — that's the agent's reasoning at the moment of failure.
3. Keep walking back. Find where the agent's model of the state diverged from reality.
## Step 3: Cross-reference the journal
For each symptom from the activity log, find the matching journal events:
- `stuck-detected` + same `flowId` → the loop detected repetition. `data.reason` says why.
- `guard-block` → a dispatch guard refused to run a unit. Check `data.reason` and trace to `dispatch-guard.ts` logic.
- `unit-end` followed by another `unit-start` for the same `unitId` → re-dispatch. If tied to `stuck-detected`, the artifact verification failed after the unit succeeded.
- `terminal` → auto-mode decided to stop. `data.reason` tells you why.
Use `flowId` to reconstruct one iteration; use `causedBy` to follow causal chains across iterations.
## Step 4: Name the root cause
A good root cause is:
- Specific: a function, a state transition, a missing guard.
- Falsifiable: if we changed X, would the failure go away?
- Sourced: cites a file and (where applicable) a line number.
Bad root cause: "Auto-mode got stuck in a loop." Good root cause: "After slice completion, `auto-unit-closeout.ts` emits `unit-end` before `auto-post-unit.ts` updates the roadmap checkbox. The next `iteration-start` finds the same unit `[ ]` and re-dispatches — `dispatch-guard.ts:42` has no check against the freshly-ended `unitId`."
Consult the source map in `src/resources/extensions/sf/prompts/forensics.md` to map symptoms to the likely domain files.
## Step 5: Propose a fix
For the root cause:
- Which file and function holds the bug?
- What minimal change would eliminate it?
- What test would have caught it? Can one be added?
- Is this a regression from a recent commit? (Run `git log -- path/to/file.ts` mentally; do NOT run git commands that could modify state.)
## Step 6: Write the report
Format the output as a GitHub-issue-ready report:
```markdown
## Symptom
<what the user saw quote the error or describe the observed behavior>
## Evidence Trail
1. `.sf/auto.lock`<state: stale / fresh>
2. `.sf/activity/042-slice-S02.jsonl:128`<isError: true from `sf_task_complete`>
3. `.sf/journal/2026-04-19.jsonl:87`<stuck-detected flowId 7a3c>
4. `.sf/metrics.json`<unit type/id "slice/S02" appears 3 times>
## Root Cause
<specific named cause file, function, state transition>
`src/resources/extensions/sf/auto-unit-closeout.ts:<line>`: <exactly what goes wrong>
## Proposed Fix
<minimal change file, function, what to change>
## Test
<what test would have caught this; whether one should be added>
## Confidence
<high / medium / low><what would change this confidence>
```
Offer to file this as a GitHub issue via `mcp__github__issue_write` — explicit confirmation required per the outward-action rule. Also save a copy to `.sf/forensics/<slug>.md` for future reference.
</process>
<anti_patterns>
- **Running any `sf_*` write tool during forensics.** Evidence stays pristine.
- **Re-running the auto-mode loop to "reproduce."** That overwrites the activity log. Read the existing one.
- **Vague root cause.** "There's a race" is not a root cause. Name the race.
- **No citations.** Every claim gets an artifact path.
- **Skipping the journal.** The journal is the only view that shows dispatch-level decisions.
- **Auto-filing the GitHub issue.** Outward actions need confirmation.
</anti_patterns>
<success_criteria>
- [ ] The symptom is quoted, not paraphrased.
- [ ] Every claim in the evidence trail cites a file and a line or field.
- [ ] The root cause names a specific file, function, or state transition.
- [ ] The proposed fix is minimal and falsifiable.
- [ ] Confidence is stated honestly.
- [ ] Report is saved under `.sf/forensics/` even if not filed as an issue.
</success_criteria>

View file

@ -0,0 +1,184 @@
# Build From Spec
End-to-end workflow: take a product idea or specification, produce working software.
## Prerequisites
- `sf` CLI installed (`npm install -g singularity-forge`)
- A directory for the project (can be empty)
- Git initialized in the directory
## Process
### Step 1: Prepare the project directory
```bash
PROJECT_DIR="/tmp/my-project-name"
mkdir -p "$PROJECT_DIR"
cd "$PROJECT_DIR"
git init 2>/dev/null # SF needs a git repo
```
### Step 2: Write the spec file
Write a spec file that describes what to build. More detail = better results.
```bash
cat > spec.md << 'SPEC'
# Product Name
## What
[Concrete description of what to build]
## Requirements
- [Specific, testable requirement 1]
- [Specific, testable requirement 2]
- [Specific, testable requirement 3]
## Technical Constraints
- [Language, framework, or platform requirements]
- [External services or APIs involved]
- [Performance or security requirements]
## Out of Scope
- [Things explicitly NOT included]
SPEC
```
**Spec quality matters.** Vague specs produce vague results. Include:
- What the user can DO when it's done (not what code to write)
- Technical constraints (language, framework, Node version)
- What's out of scope (prevents scope creep)
### Step 3: Launch the build
**Fire-and-forget (simplest — SF does everything):**
```bash
cd "$PROJECT_DIR"
RESULT=$(sf headless --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
EXIT=$?
```
`--timeout 0` disables the timeout for long builds. `--auto` chains milestone creation into execution.
**With budget limit:**
```bash
# Use step-by-step mode with budget checks instead of auto
# See workflows/step-by-step.md
```
**For CI or ecosystem runs (no user config):**
```bash
RESULT=$(sf headless --bare --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
EXIT=$?
```
### Step 4: Handle the result
```bash
case $EXIT in
0)
# Success — verify deliverables
STATUS=$(echo "$RESULT" | jq -r '.status')
COST=$(echo "$RESULT" | jq -r '.cost.total')
COMMITS=$(echo "$RESULT" | jq -r '.commits | length')
echo "Build complete: $STATUS, cost: \$$COST, commits: $COMMITS"
# Inspect what was built
sf headless query | jq '.state.progress'
# Check the actual files
ls -la "$PROJECT_DIR"
;;
1)
# Error — inspect and decide
echo "Build failed"
echo "$RESULT" | jq '{status: .status, phase: .phase}'
# Check state for details
sf headless query | jq '.state'
;;
10)
# Blocked — needs intervention
echo "Build blocked — needs human input"
sf headless query | jq '{phase: .state.phase, blockers: .state.blockers}'
# Options: steer, supply answers, or escalate
# See workflows/monitor-and-poll.md for blocker handling
;;
11)
echo "Build was cancelled"
;;
esac
```
### Step 5: Verify deliverables
After a successful build, verify the output:
```bash
cd "$PROJECT_DIR"
# Check project state
sf headless query | jq '{
phase: .state.phase,
progress: .state.progress,
cost: .cost.total
}'
# Check git log for what was built
git log --oneline
# Run the project's own tests if they exist
[ -f package.json ] && npm test 2>/dev/null
[ -f Makefile ] && make test 2>/dev/null
```
## Complete Example
```bash
# 1. Setup
mkdir -p /tmp/todo-api && cd /tmp/todo-api && git init
# 2. Write spec
cat > spec.md << 'SPEC'
# Todo API
Build a REST API for managing todo items using Node.js and Express.
## Requirements
- GET /todos — list all todos
- POST /todos — create a todo (title, completed)
- PUT /todos/:id — update a todo
- DELETE /todos/:id — delete a todo
- Todos stored in-memory (no database)
- Input validation with descriptive error messages
- Health check endpoint at GET /health
## Technical Constraints
- Node.js with ESM modules
- Express framework
- No external database — in-memory array
- Port configurable via PORT env var (default 3000)
## Out of Scope
- Authentication
- Persistent storage
- Frontend
SPEC
# 3. Launch
RESULT=$(sf headless --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
EXIT=$?
# 4. Report
if [ $EXIT -eq 0 ]; then
COST=$(echo "$RESULT" | jq -r '.cost.total')
echo "Build complete (\$$COST)"
echo "Files created:"
find . -not -path './.sf/*' -not -path './.git/*' -type f
else
echo "Build failed (exit $EXIT)"
echo "$RESULT" | jq .
fi
```

View file

@ -0,0 +1,187 @@
# Monitor and Poll
Check status of a SF project, handle blockers, track costs, and decide next actions.
## Checking Project State
The `query` command is your primary monitoring tool. It's instant (~50ms), costs nothing (no LLM), and returns the full project snapshot.
```bash
cd /path/to/project
sf headless query
```
### Key fields to inspect
```bash
# Overall status
sf headless query | jq '{
phase: .state.phase,
milestone: .state.activeMilestone.id,
slice: .state.activeSlice.id,
task: .state.activeTask.id,
progress: .state.progress,
cost: .cost.total
}'
# What should happen next
sf headless query | jq '.next'
# Returns: { "action": "dispatch", "unitType": "execute-task", "unitId": "M001/S01/T01" }
# Is it done?
sf headless query | jq '.state.phase'
# "complete" = done, "blocked" = needs you, anything else = in progress
```
### Phase meanings
| Phase | Meaning | Your action |
|-------|---------|-------------|
| `pre-planning` | Milestone exists, no slices planned yet | Run `auto` or `next` |
| `needs-discussion` | Ambiguities need resolution | Supply answers or run with defaults |
| `discussing` | Discussion in progress | Wait |
| `researching` | Codebase/library research | Wait |
| `planning` | Creating task plans | Wait |
| `executing` | Writing code | Wait |
| `verifying` | Checking must-haves | Wait |
| `summarizing` | Recording what happened | Wait |
| `advancing` | Moving to next task/slice | Wait |
| `evaluating-gates` | Quality checks before execution | Wait or run `next` |
| `validating-milestone` | Final milestone checks | Wait |
| `completing-milestone` | Archiving and cleanup | Wait |
| `complete` | Done | Verify deliverables |
| `blocked` | Needs human input | Handle blocker (see below) |
| `paused` | Explicitly paused | Resume with `auto` |
## Handling Blockers
When exit code is `10` or phase is `blocked`:
```bash
# 1. Understand the blocker
sf headless query | jq '{phase: .state.phase, blockers: .state.blockers, nextAction: .state.nextAction}'
# 2. Option A: Steer around it
sf headless steer "Skip the database dependency, use in-memory storage instead"
# 3. Option B: Supply pre-built answers
cat > fix.json << 'EOF'
{
"questions": { "blocked_question_id": "workaround_option" },
"defaults": { "strategy": "first_option" }
}
EOF
sf headless --answers fix.json auto
# 4. Option C: Force a specific phase
sf headless dispatch replan
# 5. Option D: Escalate to user
echo "SF build blocked. Phase: $(sf headless query | jq -r '.state.phase')"
echo "Manual intervention required."
```
## Cost Tracking
```bash
# Current cumulative cost
sf headless query | jq '.cost.total'
# Per-worker breakdown
sf headless query | jq '.cost.workers'
# After a step (from HeadlessJsonResult)
RESULT=$(sf headless --output-format json next 2>/dev/null)
echo "$RESULT" | jq '.cost'
```
### Budget enforcement pattern
```bash
MAX_BUDGET=15.00
check_budget() {
TOTAL=$(sf headless query | jq -r '.cost.total')
OVER=$(echo "$TOTAL > $MAX_BUDGET" | bc -l)
if [ "$OVER" = "1" ]; then
echo "Budget exceeded: \$$TOTAL > \$$MAX_BUDGET"
sf headless stop
return 1
fi
return 0
}
```
## Poll-and-React Loop
For agents that need to periodically check on a build:
```bash
cd /path/to/project
poll_project() {
STATE=$(sf headless query 2>/dev/null)
if [ -z "$STATE" ]; then
echo "NO_PROJECT"
return
fi
PHASE=$(echo "$STATE" | jq -r '.state.phase')
COST=$(echo "$STATE" | jq -r '.cost.total')
PROGRESS=$(echo "$STATE" | jq -r '"\(.state.progress.milestones.done)/\(.state.progress.milestones.total) milestones, \(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"')
case "$PHASE" in
complete)
echo "COMPLETE cost=\$$COST progress=$PROGRESS"
;;
blocked)
BLOCKER=$(echo "$STATE" | jq -r '.state.nextAction // "unknown"')
echo "BLOCKED reason=$BLOCKER cost=\$$COST"
;;
*)
NEXT=$(echo "$STATE" | jq -r '.next.action // "none"')
echo "IN_PROGRESS phase=$PHASE next=$NEXT cost=\$$COST progress=$PROGRESS"
;;
esac
}
```
## Resuming Work
If a build was interrupted or you need to continue:
```bash
cd /path/to/project
# Check current state
sf headless query | jq '.state.phase'
# Resume from where it left off
sf headless --output-format json auto 2>/dev/null
# Or resume a specific session
sf headless --resume "$SESSION_ID" --output-format json auto 2>/dev/null
```
## Reading Build Artifacts
After completion, inspect what SF produced:
```bash
cd /path/to/project
# Project summary
cat .sf/PROJECT.md
# What was decided
cat .sf/DECISIONS.md
# Requirements and their validation status
cat .sf/REQUIREMENTS.md
# Milestone summary
cat .sf/milestones/M001-*/M001-*-SUMMARY.md 2>/dev/null
# Git history (SF commits per-slice)
git log --oneline
```

View file

@ -0,0 +1,156 @@
# Step-by-Step Execution
Run SF one unit at a time with decision points between steps. Use this when you need
control over execution — budget enforcement, progress reporting, conditional logic,
or the ability to steer mid-build.
## When to use this vs `auto`
| Approach | Use when |
|----------|----------|
| `auto` | You trust the build, just want the result |
| `next` loop | You need budget checks, progress updates, or intervention points |
## Core Loop
```bash
cd /path/to/project
MAX_BUDGET=20.00
TOTAL_COST=0
while true; do
# Run one unit
RESULT=$(sf headless --output-format json next 2>/dev/null)
EXIT=$?
# Parse result
STATUS=$(echo "$RESULT" | jq -r '.status')
STEP_COST=$(echo "$RESULT" | jq -r '.cost.total')
PHASE=$(echo "$RESULT" | jq -r '.phase // empty')
SESSION_ID=$(echo "$RESULT" | jq -r '.sessionId // empty')
# Handle exit codes
case $EXIT in
0) ;; # success — continue
1)
echo "Step failed: $STATUS"
break
;;
10)
echo "Blocked — needs intervention"
sf headless query | jq '.state'
break
;;
11)
echo "Cancelled"
break
;;
esac
# Check if milestone complete
CURRENT_PHASE=$(sf headless query | jq -r '.state.phase')
if [ "$CURRENT_PHASE" = "complete" ]; then
TOTAL_COST=$(sf headless query | jq -r '.cost.total')
echo "Milestone complete. Total cost: \$$TOTAL_COST"
break
fi
# Budget check
TOTAL_COST=$(sf headless query | jq -r '.cost.total')
OVER=$(echo "$TOTAL_COST > $MAX_BUDGET" | bc -l)
if [ "$OVER" = "1" ]; then
echo "Budget limit (\$$MAX_BUDGET) exceeded at \$$TOTAL_COST"
sf headless stop
break
fi
# Progress report
PROGRESS=$(sf headless query | jq -r '"\(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"')
echo "Step done ($STATUS). Phase: $CURRENT_PHASE, Progress: $PROGRESS, Cost: \$$TOTAL_COST"
done
```
## Step-by-Step with Spec Creation
Complete flow from idea to working code with full control:
```bash
# 1. Setup
PROJECT_DIR="/tmp/my-project"
mkdir -p "$PROJECT_DIR" && cd "$PROJECT_DIR" && git init 2>/dev/null
# 2. Write spec
cat > spec.md << 'SPEC'
[Your spec here]
SPEC
# 3. Create the milestone (planning only, no execution)
RESULT=$(sf headless --output-format json --context spec.md new-milestone 2>/dev/null)
EXIT=$?
if [ $EXIT -ne 0 ]; then
echo "Milestone creation failed"
echo "$RESULT" | jq .
exit 1
fi
echo "Milestone created. Starting execution..."
# 4. Execute step-by-step
STEP=0
while true; do
STEP=$((STEP + 1))
RESULT=$(sf headless --output-format json next 2>/dev/null)
EXIT=$?
[ $EXIT -ne 0 ] && break
PHASE=$(sf headless query | jq -r '.state.phase')
COST=$(sf headless query | jq -r '.cost.total')
echo "Step $STEP complete. Phase: $PHASE, Cost: \$$COST"
[ "$PHASE" = "complete" ] && break
done
echo "Build finished in $STEP steps"
```
## Intervention Patterns
### Steer mid-execution
If you detect the build going in the wrong direction:
```bash
# Check what's happening
sf headless query | jq '{phase: .state.phase, task: .state.activeTask}'
# Redirect
sf headless steer "Use SQLite instead of PostgreSQL for storage"
# Continue
sf headless --output-format json next 2>/dev/null
```
### Skip a stuck unit
```bash
sf headless skip
sf headless --output-format json next 2>/dev/null
```
### Undo last completed unit
```bash
sf headless undo --force
sf headless --output-format json next 2>/dev/null
```
### Force a specific phase
```bash
sf headless dispatch replan # Re-plan the current slice
sf headless dispatch execute # Skip to execution
sf headless dispatch uat # Jump to user acceptance testing
```

View file

@ -0,0 +1,67 @@
import assert from "node:assert/strict";
import test from "node:test";
const { filterInitialSfHeader: filterInitialSfHeader } = await import(
"../../web/lib/initial-sf-header-filter.ts"
);
const SF_LOGO_LINES = [
" ██████╗ ███████╗██████╗ ",
" ██╔════╝ ██╔════╝██╔══██╗",
" ██║ ███╗███████╗██║ ██║",
" ██║ ██║╚════██║██║ ██║",
" ╚██████╔╝███████║██████╔╝",
" ╚═════╝ ╚══════╝╚═════╝ ",
] as const;
test("filterInitialSfHeader strips a plain startup banner and keeps real terminal content", () => {
const warning = "Warning: Google Search is not configured.";
const raw = [
...SF_LOGO_LINES,
" Singularity Forge v2.33.1",
"",
warning,
].join("\n");
const result = filterInitialSfHeader(raw);
assert.equal(result.status, "matched");
assert.equal(result.text, warning);
});
test("filterInitialSfHeader strips ANSI-colored startup banner output", () => {
const cyan = "\u001b[36m";
const reset = "\u001b[39m";
const bold = "\u001b[1m";
const boldReset = "\u001b[22m";
const dim = "\u001b[2m";
const dimReset = "\u001b[22m";
const warning = "Warning: terminal content starts here.\r\n";
const raw =
SF_LOGO_LINES.map((line) => `${cyan}${line}${reset}\r\n`).join("") +
` ${bold}Singularity Forge${boldReset} ${dim}v2.33.1${dimReset}\r\n\r\n` +
warning;
const result = filterInitialSfHeader(raw);
assert.equal(result.status, "matched");
assert.equal(result.text, warning);
});
test("filterInitialSfHeader waits for more data when the startup banner is incomplete", () => {
const partial = `${SF_LOGO_LINES[0]}\n${SF_LOGO_LINES[1]}\n${SF_LOGO_LINES[2]}`;
const result = filterInitialSfHeader(partial);
assert.deepEqual(result, { status: "needs-more", text: "" });
});
test("filterInitialSfHeader passes normal terminal output through untouched", () => {
const raw = "Warning: already in the shell\r\n$ ";
const result = filterInitialSfHeader(raw);
assert.equal(result.status, "passthrough");
assert.equal(result.text, raw);
});

80
web/proxy.ts Normal file
View file

@ -0,0 +1,80 @@
import { NextResponse, type NextRequest } from "next/server"
/**
* Next.js proxy validates bearer token and origin on all API routes.
*
* The SF_WEB_AUTH_TOKEN env var is set at server launch. Every /api/* request
* must carry a matching `Authorization: Bearer <token>` header. EventSource
* (SSE) connections may use the `_token` query parameter instead since the
* EventSource API cannot set custom headers.
*
* Additionally, if an `Origin` header is present, it must match the expected
* localhost origin to prevent cross-site request forgery.
*/
export function proxy(request: NextRequest): NextResponse | undefined {
const { pathname } = request.nextUrl
// Only gate API routes
if (!pathname.startsWith("/api/")) return NextResponse.next()
const expectedToken = process.env.SF_WEB_AUTH_TOKEN
if (!expectedToken) {
// If no token was configured (e.g. dev mode without launch harness),
// allow everything — the server didn't opt into auth.
return NextResponse.next()
}
// ── Origin / CORS check ────────────────────────────────────────────
const origin = request.headers.get("origin")
if (origin) {
const host = process.env.SF_WEB_HOST || "127.0.0.1"
const port = process.env.SF_WEB_PORT || "3000"
// Default: localhost origin for the launched host:port
const allowed = new Set([`http://${host}:${port}`])
// SF_WEB_ALLOWED_ORIGINS lets users whitelist additional origins for
// secure tunnel setups (Tailscale Serve, Cloudflare Tunnel, ngrok, etc.)
const extra = process.env.SF_WEB_ALLOWED_ORIGINS
if (extra) {
for (const entry of extra.split(",")) {
const trimmed = entry.trim()
if (trimmed) allowed.add(trimmed)
}
}
if (!allowed.has(origin)) {
return NextResponse.json(
{ error: "Forbidden: origin mismatch" },
{ status: 403 },
)
}
}
// ── Bearer token check ─────────────────────────────────────────────
let token: string | null = null
// 1. Authorization header (preferred)
const authHeader = request.headers.get("authorization")
if (authHeader?.startsWith("Bearer ")) {
token = authHeader.slice(7)
}
// 2. Query parameter fallback for EventSource / SSE
if (!token) {
token = request.nextUrl.searchParams.get("_token")
}
if (!token || token !== expectedToken) {
return NextResponse.json(
{ error: "Unauthorized" },
{ status: 401 },
)
}
return NextResponse.next()
}
export const config = {
matcher: "/api/:path*",
}