fix: stabilize sf auto and subagent routing

2026-04-30 21:55:17 +02:00 · 2026-04-30 21:55:17 +02:00 · 78be73fcb8
commit 78be73fcb8
parent da324da27e
77 changed files with 5395 additions and 11 deletions
--- a/.github/workflows/dev-publish.yml
+++ b/.github/workflows/dev-publish.yml
@ -0,0 +1,151 @@
+# singularity-forge + CI: manual @dev channel publish with approval gate
+name: Dev Publish
+
+# Manual pre-release. Click "Run workflow" in the Actions tab to stamp a
+# version and publish @dev to npm. Gated by the `dev` GitHub Environment
+# (configure reviewers in repo Settings -> Environments).
+
+on:
+  workflow_dispatch:
+    inputs:
+      ref:
+        description: 'Branch or SHA to publish as @dev'
+        required: false
+        default: 'main'
+
+concurrency:
+  group: dev-publish-${{ github.event.inputs.ref }}
+  cancel-in-progress: false
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  dev-publish:
+    name: Dev Publish
+    runs-on: ubuntu-latest
+    environment: dev
+    outputs:
+      dev-version: ${{ steps.stamp.outputs.version }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ github.event.inputs.ref }}
+          token: ${{ secrets.RELEASE_PAT }}
+          fetch-depth: 0
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version: 22
+          registry-url: https://registry.npmjs.org
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Install web host dependencies
+        run: npm --prefix web ci
+
+      - name: Cache Next.js build
+        uses: actions/cache@v4
+        with:
+          path: web/.next/cache
+          key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }}
+          restore-keys: |
+            nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-
+            nextjs-${{ runner.os }}-
+
+      - name: Build core
+        run: npm run build:core
+
+      - name: Build web host
+        run: npm run build:web-host
+
+      - name: Stamp dev version and sync platform packages
+        id: stamp
+        env:
+          VERSION_CHANNEL: dev
+        run: |
+          npm run pipeline:version-stamp
+          npm run sync-platform-versions
+          echo "version=$(node -e 'process.stdout.write(require("./package.json").version)')" >> "$GITHUB_OUTPUT"
+
+      - name: Smoke test
+        run: |
+          chmod +x dist/loader.js
+          export SF_SMOKE_BINARY="$(pwd)/dist/loader.js"
+          npm run test:smoke
+
+      - name: Publish @dev
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: |
+          VERSION=$(node -e 'process.stdout.write(require("./package.json").version)')
+          if npm view "singularity-forge@${VERSION}" version 2>/dev/null; then
+            echo "Version ${VERSION} already published — moving @dev tag"
+            npm dist-tag add "singularity-forge@${VERSION}" dev
+          else
+            npm publish --tag dev
+          fi
+          echo "Verifying singularity-forge@${VERSION} is reachable on npm..."
+          for i in 1 2 3 4 5; do
+            npm view "singularity-forge@${VERSION}" version 2>/dev/null && echo "Confirmed: singularity-forge@${VERSION} is live." && exit 0
+            echo "Attempt $i: not yet visible — waiting 10s..."
+            sleep 10
+          done
+          echo "::error::Publish step succeeded but singularity-forge@${VERSION} is not reachable on npm after 50s. Check NPM_TOKEN permissions and registry config."
+          exit 1
+
+  dev-verify:
+    name: Dev Verify (installed package)
+    needs: dev-publish
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ github.event.inputs.ref }}
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version: 22
+          registry-url: https://registry.npmjs.org
+          cache: 'npm'
+
+      - name: Install published singularity-forge@dev globally (with registry propagation retry)
+        env:
+          DEV_VERSION: ${{ needs.dev-publish.outputs.dev-version }}
+        run: |
+          for i in 1 2 3 4 5 6; do
+            npm install -g "singularity-forge@${DEV_VERSION}" && exit 0
+            echo "Attempt $i failed — waiting 10s for npm registry propagation..."
+            sleep 10
+          done
+          echo "::error::Failed to install singularity-forge@${DEV_VERSION} after 6 attempts."
+          echo "::error::Recommended actions: (1) investigate the failing step above, (2) if the version exists on npm, deprecate it with 'npm deprecate singularity-forge@${DEV_VERSION} \"broken build; see Actions run\"', (3) cut a fix and re-run Dev Publish."
+          exit 1
+
+      - name: Run smoke tests (against installed binary)
+        run: |
+          export SF_SMOKE_BINARY=$(which sf)
+          npm run test:smoke
+
+      - name: Install repo dependencies (for regression harness)
+        run: npm ci
+
+      - name: Run live regression tests (against installed binary)
+        run: |
+          export SF_SMOKE_BINARY=$(which sf)
+          npm run test:live-regression
+
+      - name: Warn on verify failure
+        if: failure()
+        env:
+          DEV_VERSION: ${{ needs.dev-publish.outputs.dev-version }}
+        run: |
+          echo "::error::Post-publish verification failed for singularity-forge@${DEV_VERSION}."
+          echo "::error::Recommended actions: (1) investigate the failing step above, (2) if the version exists on npm, deprecate it with 'npm deprecate singularity-forge@${DEV_VERSION} \"broken build; see Actions run\"', (3) cut a fix and re-run Dev Publish."
+          exit 1
--- a/.github/workflows/forensics-check.yml
+++ b/.github/workflows/forensics-check.yml
@ -0,0 +1,86 @@
+name: Forensics Check
+
+on:
+  issues:
+    types: [opened, edited]
+
+permissions:
+  issues: write
+
+jobs:
+  check-forensics:
+    # Only run on bug reports
+    if: contains(github.event.issue.labels.*.name, 'bug')
+    runs-on: blacksmith-4vcpu-ubuntu-2404
+    steps:
+      - name: Check for forensics output and comment if missing
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const body = context.payload.issue.body || '';
+            const issueNumber = context.payload.issue.number;
+            const forensicsMarker = 'Auto-generated by `/sf forensics`';
+
+            if (body.includes(forensicsMarker)) {
+              core.info('Forensics output found in issue body — no comment needed.');
+              return;
+            }
+
+            // Check comments too — reporter may have added it after opening
+            const comments = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+            });
+
+            const forensicsInComments = comments.data.some(c =>
+              c.body && c.body.includes(forensicsMarker)
+            );
+
+            if (forensicsInComments) {
+              core.info('Forensics output found in comments — no comment needed.');
+              return;
+            }
+
+            // Avoid duplicate bot comments
+            const botMarker = '<!-- sf-forensics-check -->';
+            const alreadyCommented = comments.data.some(c =>
+              c.user.type === 'Bot' && c.body && c.body.includes(botMarker)
+            );
+
+            if (alreadyCommented) {
+              core.info('Forensics request comment already posted — skipping duplicate.');
+              return;
+            }
+
+            const comment = [
+              botMarker,
+              '',
+              'Thanks for the bug report! To help us investigate, please run `/sf forensics` in your project and paste the output here.',
+              '',
+              '```bash',
+              '# In your project directory:',
+              '/sf forensics',
+              '```',
+              '',
+              'The forensics output includes git history analysis, session traces, stuck-loop detection, and cost data that significantly speeds up diagnosis.',
+              '',
+              '---',
+              '*This is an automated check. If `/sf forensics` is not available in your version, you can skip this step.*',
+            ].join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: comment,
+            });
+
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              labels: ['needs-forensics'],
+            });
+
+            core.info('Posted forensics request comment.');
--- a/.github/workflows/next-publish.yml
+++ b/.github/workflows/next-publish.yml
@ -0,0 +1,143 @@
+name: Next Publish
+
+# Manual pre-release. Click "Run workflow" in the Actions tab to stamp a
+# version and publish @next to npm. Optional approval gate via the `next`
+# GitHub Environment (configure reviewers in repo Settings -> Environments).
+
+on:
+  workflow_dispatch:
+    inputs:
+      ref:
+        description: 'Branch or SHA to publish as @next'
+        required: false
+        default: 'next'
+
+concurrency:
+  group: next-publish-${{ github.event.inputs.ref }}
+  cancel-in-progress: false
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  next-publish:
+    name: Next Publish
+    runs-on: ubuntu-latest
+    environment: next
+    outputs:
+      next-version: ${{ steps.stamp.outputs.version }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ github.event.inputs.ref }}
+          token: ${{ secrets.RELEASE_PAT }}
+          fetch-depth: 0
+
+      - name: Mark workspace safe for git
+        run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version: 22
+          registry-url: https://registry.npmjs.org
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Install web host dependencies
+        run: npm --prefix web ci
+
+      - name: Cache Next.js build
+        uses: actions/cache@v4
+        with:
+          path: web/.next/cache
+          key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }}
+          restore-keys: |
+            nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-
+            nextjs-${{ runner.os }}-
+
+      - name: Build core
+        run: npm run build:core
+
+      - name: Build web host
+        run: npm run build:web-host
+
+      - name: Stamp next version and sync platform packages
+        id: stamp
+        env:
+          VERSION_CHANNEL: next
+        run: |
+          npm run pipeline:version-stamp
+          npm run sync-platform-versions
+          echo "version=$(node -e 'process.stdout.write(require("./package.json").version)')" >> "$GITHUB_OUTPUT"
+
+      - name: Smoke test
+        run: |
+          chmod +x dist/loader.js
+          export SF_SMOKE_BINARY="$(pwd)/dist/loader.js"
+          npm run test:smoke
+
+      - name: Publish @next
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: |
+          VERSION=$(node -e 'process.stdout.write(require("./package.json").version)')
+          if npm view "singularity-forge@${VERSION}" version 2>/dev/null; then
+            echo "Version ${VERSION} already published — moving @next tag"
+            npm dist-tag add "singularity-forge@${VERSION}" next
+          else
+            npm publish --tag next
+          fi
+
+  next-verify:
+    name: Next Verify (installed package)
+    needs: next-publish
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ github.event.inputs.ref }}
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version: 22
+          registry-url: https://registry.npmjs.org
+          cache: 'npm'
+
+      - name: Install published singularity-forge@next globally (with registry propagation retry)
+        env:
+          NEXT_VERSION: ${{ needs.next-publish.outputs.next-version }}
+        run: |
+          for i in 1 2 3 4 5 6; do
+            npm install -g "singularity-forge@${NEXT_VERSION}" && exit 0
+            echo "Attempt $i failed — waiting 10s for npm registry propagation..."
+            sleep 10
+          done
+          echo "::error::Failed to install singularity-forge@${NEXT_VERSION} after 6 attempts. The @next tag may point at a broken artifact — deprecate it with: npm deprecate singularity-forge@${NEXT_VERSION} 'broken build'"
+          exit 1
+
+      - name: Run smoke tests (against installed binary)
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: |
+          export SF_SMOKE_BINARY=$(which sf)
+          npm run test:smoke
+
+      - name: Install repo dependencies (for regression harness)
+        run: npm ci
+
+      - name: Run live regression tests (against installed binary)
+        run: |
+          export SF_SMOKE_BINARY=$(which sf)
+          npm run test:live-regression
+
+      - name: Warn on verify failure
+        if: failure()
+        env:
+          NEXT_VERSION: ${{ needs.next-publish.outputs.next-version }}
+        run: |
+          echo "::error::Post-publish verification failed for singularity-forge@${NEXT_VERSION}. The @next tag still points at this version on npm."
+          echo "::error::Recommended actions: (1) investigate the failing step above, (2) deprecate the broken version with 'npm deprecate singularity-forge@${NEXT_VERSION} \"broken build; see Actions run\"', (3) cut a fix and re-run Next Publish."
+          exit 1
--- a/.github/workflows/prod-release.yml
+++ b/.github/workflows/prod-release.yml
@ -0,0 +1,177 @@
+name: Prod Release
+
+# Manual prod release. Click "Run workflow" in the Actions tab to cut @latest
+# from main. Gated by the `prod` GitHub Environment approval before any
+# publishing or commit-push side effects run.
+
+on:
+  workflow_dispatch: {}
+
+concurrency:
+  group: prod-release
+  cancel-in-progress: false
+
+permissions:
+  contents: write
+  packages: write
+  pull-requests: write
+
+jobs:
+  prod-release:
+    name: Production Release
+    runs-on: ubuntu-latest
+    environment: prod
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: main
+          fetch-depth: 0
+          token: ${{ secrets.RELEASE_PAT }}
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version: 22
+          registry-url: https://registry.npmjs.org
+          cache: 'npm'
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Cache Next.js build
+        uses: actions/cache@v4
+        with:
+          path: web/.next/cache
+          key: nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-${{ hashFiles('web/app/**', 'web/components/**', 'web/lib/**', 'web/hooks/**') }}
+          restore-keys: |
+            nextjs-${{ runner.os }}-${{ hashFiles('web/package-lock.json') }}-
+            nextjs-${{ runner.os }}-
+
+      - name: Run live LLM tests (optional)
+        continue-on-error: true
+        run: npm run test:live || echo "::warning::Live LLM tests failed — non-blocking, but worth investigating"
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          SF_LIVE_TESTS: "1"
+
+      - name: Generate changelog and determine version
+        id: release
+        run: |
+          OUTPUT=$(node scripts/generate-changelog.mjs)
+          echo "$OUTPUT" | jq .
+          echo "version=$(echo "$OUTPUT" | jq -r '.newVersion')" >> "$GITHUB_OUTPUT"
+          echo "$OUTPUT" | jq -r '.changelogEntry' > /tmp/changelog-entry.md
+          echo "$OUTPUT" | jq -r '.releaseNotes' > /tmp/release-notes.md
+
+      - name: Bump version and sync packages
+        env:
+          RELEASE_VERSION: ${{ steps.release.outputs.version }}
+        run: node scripts/bump-version.mjs "$RELEASE_VERSION"
+
+      - name: Validate package files after version bump
+        run: |
+          node -e "require('./package.json')" && \
+          node -e "require('./packages/pi-coding-agent/package.json')" && \
+          node -e "require('./pkg/package.json')" && \
+          echo "All package.json files are valid"
+
+      - name: Update CHANGELOG.md
+        run: node scripts/update-changelog.mjs /tmp/changelog-entry.md
+
+      - name: Commit and tag release
+        env:
+          RELEASE_VERSION: ${{ steps.release.outputs.version }}
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add package.json package-lock.json web/package-lock.json CHANGELOG.md rust-engine/npm/*/package.json pkg/package.json packages/*/package.json
+          git commit -m "release: v${RELEASE_VERSION}"
+          git pull --rebase origin main
+          git tag "v${RELEASE_VERSION}"
+
+      - name: Build release
+        run: npm run build
+
+      - name: Publish release to npm @latest
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+          RELEASE_VERSION: ${{ steps.release.outputs.version }}
+        run: |
+          OUTPUT=$(npm publish 2>&1) && echo "$OUTPUT" || {
+            if echo "$OUTPUT" | grep -q "cannot publish over the previously published"; then
+              echo "Version already published — promoting to latest"
+              npm dist-tag add "singularity-forge@${RELEASE_VERSION}" latest
+            else
+              echo "$OUTPUT"
+              exit 1
+            fi
+          }
+
+      - name: Push release commit and tag
+        env:
+          RELEASE_VERSION: ${{ steps.release.outputs.version }}
+        run: |
+          git push origin main
+          git push origin "v${RELEASE_VERSION}"
+
+      - name: Create GitHub Release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          RELEASE_VERSION: ${{ steps.release.outputs.version }}
+        run: |
+          gh release create "v${RELEASE_VERSION}" \
+            --title "v${RELEASE_VERSION}" \
+            --notes-file /tmp/release-notes.md \
+            --latest
+
+      - name: Post to Discord
+        if: ${{ env.DISCORD_WEBHOOK != '' }}
+        env:
+          DISCORD_WEBHOOK: ${{ secrets.DISCORD_CHANGELOG_WEBHOOK }}
+          RELEASE_VERSION: ${{ steps.release.outputs.version }}
+        run: |
+          NOTES=$(cat /tmp/release-notes.md)
+          curl -s -X POST "$DISCORD_WEBHOOK" \
+            -H "Content-Type: application/json" \
+            -d "$(jq -n --arg c "**SF v${RELEASE_VERSION} Released**\n\n${NOTES}\n\n\`npm i singularity-forge@${RELEASE_VERSION}\`" '{content:$c}')"
+
+      # Docker publish disabled — no ghcr.io package configured yet
+      # - name: Log in to GHCR
+      #   uses: docker/login-action@v4
+      #   with:
+      #     registry: ghcr.io
+      #     username: ${{ github.actor }}
+      #     password: ${{ secrets.GITHUB_TOKEN }}
+      #
+      # - name: Build and push release Docker image
+      #   env:
+      #     RELEASE_VERSION: ${{ steps.release.outputs.version }}
+      #   run: |
+      #     docker build --target runtime \
+      #       -t ghcr.io/singularity-ng/singularity-forge:latest \
+      #       -t "ghcr.io/singularity-ng/singularity-forge:${RELEASE_VERSION}" \
+      #       .
+      #     docker push "ghcr.io/singularity-ng/singularity-forge:${RELEASE_VERSION}"
+      #     docker push ghcr.io/singularity-ng/singularity-forge:latest
+
+      - name: Open back-merge PR main→next if behind
+        env:
+          GH_TOKEN: ${{ secrets.RELEASE_PAT }}
+          RELEASE_VERSION: ${{ steps.release.outputs.version }}
+        run: |
+          if ! git ls-remote --exit-code --heads origin next >/dev/null 2>&1; then
+            echo "next branch does not exist yet; skipping back-merge"
+            exit 0
+          fi
+          git fetch origin next main
+          BEHIND=$(git rev-list --count origin/next..origin/main)
+          if [ "$BEHIND" -gt 0 ]; then
+            BRANCH="backmerge/main-to-next-v${RELEASE_VERSION}"
+            git checkout -B "$BRANCH" origin/main
+            git push origin "$BRANCH" --force-with-lease
+            gh pr create --base next --head "$BRANCH" \
+              --title "chore: back-merge main to next (v${RELEASE_VERSION})" \
+              --body "Sync release commit and version bump from main into next." || true
+          else
+            echo "next is up to date with main; no back-merge needed"
+          fi
--- a/.github/workflows/version-check.yml
+++ b/.github/workflows/version-check.yml
@ -0,0 +1,111 @@
+name: Version Check
+
+on:
+  issues:
+    types: [opened, edited]
+
+permissions:
+  issues: write
+
+jobs:
+  check-version:
+    if: ${{ github.event_name == 'issues' && contains(github.event.issue.body, 'SF version') }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check SF version and comment if outdated
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const body = context.payload.issue.body || '';
+            const issueNumber = context.payload.issue.number;
+
+            const match = body.match(/###\s+SF version\s*\n+\s*([^\s\n]+)/i);
+            if (!match) {
+              core.info('Could not find a SF version value in the issue body - skipping.');
+              return;
+            }
+
+            const reportedVersion = match[1].trim().replace(/^v/, '');
+            core.info('Reported version: ' + reportedVersion);
+
+            const npmResponse = await fetch('https://registry.npmjs.org/singularity-forge/latest');
+            if (!npmResponse.ok) {
+              core.setFailed('npm registry request failed: ' + npmResponse.status);
+              return;
+            }
+            const npmData = await npmResponse.json();
+            const latestVersion = npmData.version;
+            core.info('Latest version: ' + latestVersion);
+
+            function parseVersion(v) {
+              const parts = v.replace(/^v/, '').split('.').map(Number);
+              return [parts[0] || 0, parts[1] || 0, parts[2] || 0];
+            }
+
+            function isOutdated(reported, latest) {
+              const r = parseVersion(reported);
+              const l = parseVersion(latest);
+              if (r[0] !== l[0]) return r[0] < l[0];
+              if (r[1] !== l[1]) return r[1] < l[1];
+              return r[2] < l[2];
+            }
+
+            if (!isOutdated(reportedVersion, latestVersion)) {
+              core.info('Version ' + reportedVersion + ' is current - no comment needed.');
+              return;
+            }
+
+            const comments = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+            });
+
+            const botMarker = '<!-- sf-version-check -->';
+            const alreadyCommented = comments.data.some(function (c) {
+              return c.user.type === 'Bot' && c.body.indexOf(botMarker) !== -1;
+            });
+
+            if (alreadyCommented) {
+              core.info('Version check comment already posted - skipping duplicate.');
+              return;
+            }
+
+            const lines = [
+              botMarker,
+              '',
+              'Thanks for filing this bug report!',
+              '',
+              'It looks like you are running **SF v' + reportedVersion + '**, but the latest release is **v' + latestVersion + '**.',
+              '',
+              'Before we investigate further, please upgrade and check whether the issue still occurs:',
+              '',
+              '```bash',
+              'npm install -g singularity-forge@latest',
+              'sf --version   # should print ' + latestVersion,
+              '```',
+              '',
+              'Then re-run your reproduction steps. If the problem persists on **v' + latestVersion + '**, please update the **SF version** field in this issue and let us know.',
+              '',
+              '> **Why?** Many bugs are fixed in subsequent releases. Confirming on the latest version keeps the team focused on real, current issues.',
+              '',
+              '---',
+              '*This is an automated check. If you are intentionally pinned to an older version, feel free to explain why and we will continue from there.*',
+            ];
+            const comment = lines.join('\n');
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              body: comment,
+            });
+
+            await github.rest.issues.addLabels({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issueNumber,
+              labels: ['needs-upgrade'],
+            });
+
+            core.info('Posted upgrade prompt for v' + reportedVersion + ' -> v' + latestVersion);
--- a/.siftignore
+++ b/.siftignore
@ -0,0 +1,19 @@
+.git/**
+.sf/**
+.bg-shell/**
+.pytest_cache/**
+.venv/**
+venv/**
+node_modules/**
+**/node_modules/**
+**/__pycache__/**
+*.pyc
+*.egg-info/**
+build/**
+dist/**
+target/**
+vendor/**
+coverage/**
+.cache/**
+tmp/**
+*.log
--- a/1
+++ b/1
@ -0,0 +1 @@
+.sf/CODEBASE.md:2
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@ -0,0 +1,20 @@
+# Architecture
+
+This file is the short map of the codebase. Keep it current and compact.
+
+## Purpose
+
+Describe the product, its users, and the job this repository exists to do.
+
+## Codemap
+
+- `src/`: primary implementation.
+- `tests/`: behavior and regression coverage.
+- `docs/`: durable product, design, plan, reliability, and security context.
+
+## Invariants
+
+- Prefer small, named modules with clear ownership.
+- Behavior changes need tests or an explicit eval.
+- Keep generated artifacts out of hand-written design docs.
+- Update this map when new top-level concepts or directories become important.
--- a/TODO.md
+++ b/TODO.md
@ -0,0 +1 @@
+# Raw Dump Inbox\n\n## Eval Candidates\n\n1. Test note for CI mode verification
--- a/docs/DESIGN.md
+++ b/docs/DESIGN.md
@ -0,0 +1,3 @@
+# Design
+
+Record interaction patterns, visual constraints, and design-system usage here.
--- a/docs/FRONTEND.md
+++ b/docs/FRONTEND.md
@ -0,0 +1,3 @@
+# Frontend
+
+Record frontend architecture, component ownership, accessibility constraints, and browser support here.
--- a/docs/PLANS.md
+++ b/docs/PLANS.md
@ -0,0 +1,3 @@
+# Plans
+
+Use this as the index for current and upcoming work. Link detailed plans in `docs/exec-plans/`.
--- a/docs/PRODUCT_SENSE.md
+++ b/docs/PRODUCT_SENSE.md
@ -0,0 +1,3 @@
+# Product Sense
+
+Capture user goals, non-goals, tradeoffs, and examples of good product judgment for this repo.
--- a/docs/QUALITY_SCORE.md
+++ b/docs/QUALITY_SCORE.md
@ -0,0 +1,10 @@
+# Quality Score
+
+Define what good looks like for this repo. Include fast checks, slow checks, evals, and known blind spots.
+
+Use these principles:
+
+- Make code legible to agents with semantic names and explicit boundaries.
+- Prefer small, testable modules over files that require broad context to edit.
+- Enforce style, architecture, and reliability rules mechanically where possible.
+- Keep a cleanup loop for stale docs, generated artifacts, and accumulated implementation debt.
--- a/docs/RECORDS_KEEPER.md
+++ b/docs/RECORDS_KEEPER.md
@ -0,0 +1,35 @@
+# Records Keeper
+
+The records keeper keeps repo memory ordered after meaningful changes. Run this checklist at milestone close, after architecture changes, after product behavior changes, and whenever docs/source disagree.
+
+Use the `records-keeper` skill for this workflow when SF skills are available. Use `context-doctor` instead when stale state lives under `.sf/` or the memory store.
+
+## Canonical Homes
+
+- Root `AGENTS.md`: short routing map for agents.
+- `ARCHITECTURE.md`: short system map, boundaries, invariants, critical flows, and verification.
+- `docs/product-specs/`: durable user-facing behavior and product decisions.
+- `docs/design-docs/`: durable design and architecture decisions.
+- `docs/exec-plans/`: active/completed work plans and technical debt.
+- `docs/generated/`: generated references only.
+- `docs/records/`: audits, ledgers, and context-gardening outputs.
+
+## Checklist
+
+- Root map is current: `AGENTS.md` points to the right canonical docs and local `AGENTS.md` files.
+- Architecture is current: new subsystems, boundaries, invariants, data/state, or critical flows are reflected in `ARCHITECTURE.md`.
+- Product specs are current: user-visible behavior changes are reflected in `docs/product-specs/`.
+- Execution plans are filed: active work is in `docs/exec-plans/active/`; completed summaries and evidence are in `docs/exec-plans/completed/`.
+- Debt is visible: discovered cleanup is listed in `docs/exec-plans/tech-debt-tracker.md`.
+- Generated docs are marked: generated material stays under `docs/generated/` or clearly says how to regenerate it.
+- Contradictions are resolved: stale docs are updated or marked superseded with links to the source of truth.
+- Verification is recorded: changed checks, evals, and commands are listed in the relevant plan or quality document.
+
+## Output
+
+When records work is non-trivial, write a dated note under `docs/records/` with:
+
+- What changed.
+- What canonical docs were updated.
+- What contradictions were found.
+- What remains unresolved.
--- a/docs/RELIABILITY.md
+++ b/docs/RELIABILITY.md
@ -0,0 +1,3 @@
+# Reliability
+
+Document expected failure modes, recovery paths, observability, and release checks here.
--- a/docs/SECURITY.md
+++ b/docs/SECURITY.md
@ -0,0 +1,3 @@
+# Security
+
+Document trust boundaries, secrets handling, dependency risk, and security review requirements here.
--- a/docs/design-docs/core-beliefs.md
+++ b/docs/design-docs/core-beliefs.md
@ -0,0 +1,5 @@
+# Core Beliefs
+
+- The repo should explain itself to humans and agents.
+- Plans should carry acceptance criteria, falsifiers, and verification commands.
+- Architecture should be mechanically checkable where possible.
--- a/docs/design-docs/index.md
+++ b/docs/design-docs/index.md
@ -0,0 +1,3 @@
+# Design Docs
+
+Durable design decisions live here. Link active proposals, completed decisions, and rejected alternatives.
--- a/docs/exec-plans/active/index.md
+++ b/docs/exec-plans/active/index.md
@ -0,0 +1,3 @@
+# Active Execution Plans
+
+Link active plans here. Each plan should state purpose, scope, tasks, acceptance criteria, and verification.
--- a/docs/exec-plans/completed/index.md
+++ b/docs/exec-plans/completed/index.md
@ -0,0 +1,3 @@
+# Completed Execution Plans
+
+Move finished plan summaries here with evidence links and follow-up debt.
--- a/docs/exec-plans/tech-debt-tracker.md
+++ b/docs/exec-plans/tech-debt-tracker.md
@ -0,0 +1,3 @@
+# Tech Debt Tracker
+
+Track cleanup discovered during implementation. Include owner, impact, proposed fix, and verification.
--- a/docs/generated/db-schema.md
+++ b/docs/generated/db-schema.md
@ -0,0 +1,3 @@
+# Database Schema
+
+Generated or refreshed schema notes belong here. Do not hand-maintain stale schema copies.
--- a/docs/product-specs/index.md
+++ b/docs/product-specs/index.md
@ -0,0 +1,3 @@
+# Product Specs
+
+Durable user-facing behavior, workflows, and product decisions live here.
--- a/docs/product-specs/new-user-onboarding.md
+++ b/docs/product-specs/new-user-onboarding.md
@ -0,0 +1,3 @@
+# New User Onboarding
+
+Describe the first-run experience, success criteria, and failure states when this product has an onboarding flow.
--- a/docs/records/index.md
+++ b/docs/records/index.md
@ -0,0 +1,3 @@
+# Records
+
+This folder holds repo-memory audits, decision ledgers, context-gardening notes, and records-keeper outputs.
--- a/docs/references/design-system-reference-llms.txt
+++ b/docs/references/design-system-reference-llms.txt
@ -0,0 +1 @@
+Reference slot for design-system guidance intended for LLM consumption.
--- a/docs/references/nixpacks-llms.txt
+++ b/docs/references/nixpacks-llms.txt
@ -0,0 +1 @@
+Reference slot for Nixpacks deployment/build guidance intended for LLM consumption.
--- a/docs/references/uv-llms.txt
+++ b/docs/references/uv-llms.txt
@ -0,0 +1 @@
+Reference slot for uv/Python tooling guidance intended for LLM consumption.
--- a/packages/pi-ai/src/providers/sanitize-tool-arguments.ts
+++ b/packages/pi-ai/src/providers/sanitize-tool-arguments.ts
@ -0,0 +1,83 @@
+import { shortHash } from "../utils/hash.js";
+
+const MAX_TOOL_ARGUMENT_KEY_LENGTH = 256;
+const LONG_KEY_PREFIX = "tool_arg_";
+
+function isObject(value: unknown): value is Record<string, unknown> {
+	return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+function clampKey(base: string, maxLength: number): string {
+	return base.length <= maxLength ? base : base.slice(0, maxLength);
+}
+
+function makeSafeKey(
+	key: string,
+	maxLength: number,
+	usedKeys: Set<string>,
+	seen: Map<string, string>,
+): string {
+	if (key.length <= maxLength && !usedKeys.has(key)) {
+		return key;
+	}
+
+	if (usedKeys.has(key)) {
+		const base = `${LONG_KEY_PREFIX}${shortHash(key)}`;
+		const safeBase = clampKey(base, maxLength);
+		let next = 0;
+		let candidate = safeBase;
+		while (usedKeys.has(candidate)) {
+			candidate = clampKey(`${safeBase}_${next}`, maxLength);
+			next += 1;
+		}
+		seen.set(key, candidate);
+		return candidate;
+	}
+
+	const existing = seen.get(key);
+	if (existing) {
+		let next = 0;
+		let candidate = existing;
+		while (usedKeys.has(candidate)) {
+			candidate = clampKey(`${existing}_${next}`, maxLength);
+			next += 1;
+		}
+		return candidate;
+	}
+
+	const base = `${LONG_KEY_PREFIX}${shortHash(key)}`;
+	const safeBase = clampKey(base, maxLength);
+	let next = 0;
+	let candidate = safeBase;
+	while (usedKeys.has(candidate)) {
+		candidate = clampKey(`${safeBase}_${next}`, maxLength);
+		next += 1;
+	}
+	seen.set(key, candidate);
+	return candidate;
+}
+
+export function sanitizeToolCallArgumentsForSerialization(
+	args: unknown,
+	maxKeyLength = MAX_TOOL_ARGUMENT_KEY_LENGTH,
+): unknown {
+	if (isObject(args)) {
+		const output: Record<string, unknown> = {};
+		const usedKeys = new Set<string>();
+		const replacements = new Map<string, string>();
+
+		for (const [key, value] of Object.entries(args)) {
+			const safeKey = makeSafeKey(key, maxKeyLength, usedKeys, replacements);
+			output[safeKey] = sanitizeToolCallArgumentsForSerialization(value, maxKeyLength);
+			usedKeys.add(safeKey);
+		}
+		return output;
+	}
+
+	if (Array.isArray(args)) {
+		return args.map((entry) => sanitizeToolCallArgumentsForSerialization(entry, maxKeyLength));
+	}
+
+	return args;
+}
+
--- a/rust-engine/npm/linux-x64-gnu/forge_engine.node
+++ b/rust-engine/npm/linux-x64-gnu/forge_engine.node
--- a/src/errors.ts
+++ b/src/errors.ts
@ -0,0 +1,146 @@
+/**
+ * errors.ts — Structured error types for consistent, actionable CLI diagnostics.
+ *
+ * Purpose: every error path in the CLI and headless orchestrator should be
+ * able to emit context that helps users (and future debuggers) understand
+ * *what* failed, *where*, and *what to try next* — without depending on
+ * heavy error-handling libraries.
+ *
+ * Consumer: cli.ts, headless.ts, and any extension that surfaces user-facing
+ *   failures.  The types are plain data so they serialize cleanly to stderr,
+ *   JSON batch output, and trace spans.
+ */
+
+// ---------------------------------------------------------------------------
+// Core structured error type
+// ---------------------------------------------------------------------------
+
+/**
+ * A user-facing or machine-readable error record with rich context.
+ *
+ * All fields are optional except `message` so that call-sites can incrementally
+ * adopt structured errors without rewriting every catch block at once.
+ */
+export interface StructuredError {
+	/** Human-readable description of what went wrong. */
+	message: string;
+
+	/** The high-level operation that was in progress when the error occurred,
+	 *  e.g. "graph build", "session resume", "model validation". */
+	operation?: string;
+
+	/** The file path most relevant to the failure (the file being read,
+	 *  written, or expected). */
+	file?: string;
+
+	/** The line number inside `file` if known (e.g. from a parser error). */
+	line?: number;
+
+	/** Actionable guidance for the user — what to check or try next. */
+	guidance?: string;
+
+	/** Whether retrying the same operation (with the same inputs) might
+	 *  succeed, e.g. transient network failures. */
+	retry?: boolean;
+
+	/** The underlying cause, if this error wraps another.  Kept as `unknown`
+	 *  so callers aren't forced to coerce to Error. */
+	cause?: unknown;
+}
+
+// ---------------------------------------------------------------------------
+// Convenience constructors
+// ---------------------------------------------------------------------------
+
+/**
+ * Create a {@link StructuredError} from a message and optional context.
+ *
+ * Purpose: reduce boilerplate at catch sites where we want to enrich a raw
+ * exception with operation/file context before logging or returning it.
+ *
+ * Consumer: cli.ts catch blocks, headless.ts event handlers.
+ */
+export function error(
+	message: string,
+	ctx?: Omit<StructuredError, "message">,
+): StructuredError {
+	return { message, ...ctx };
+}
+
+// ---------------------------------------------------------------------------
+// Formatters
+// ---------------------------------------------------------------------------
+
+/**
+ * Format a {@link StructuredError} as plain text suitable for stderr.
+ *
+ * Output shape (fields omitted when undefined):
+ *   [sf] Error: <message>
+ *          Operation: <operation>
+ *          File:      <file>:<line>
+ *          Guidance:  <guidance>
+ *          Retryable: yes|no
+ */
+export function formatStructuredError(
+	err: StructuredError,
+	prefix = "[sf]",
+): string {
+	const parts: string[] = [`${prefix} Error: ${err.message}`];
+
+	if (err.operation) {
+		parts.push(`       Operation: ${err.operation}`);
+	}
+	if (err.file) {
+		const line = err.line !== undefined ? `:${err.line}` : "";
+		parts.push(`       File:      ${err.file}${line}`);
+	}
+	if (err.guidance) {
+		parts.push(`       Guidance:  ${err.guidance}`);
+	}
+	if (err.retry !== undefined) {
+		parts.push(`       Retryable: ${err.retry ? "yes" : "no"}`);
+	}
+
+	return parts.join("\n") + "\n";
+}
+
+/**
+ * Format a {@link StructuredError} as a JSON object.
+ *
+ * Purpose: headless --output-format json mode can embed structured errors
+ * in the result payload instead of interleaving free-form text on stderr.
+ */
+export function errorToJson(err: StructuredError): Record<string, unknown> {
+	const out: Record<string, unknown> = { message: err.message };
+	if (err.operation !== undefined) out.operation = err.operation;
+	if (err.file !== undefined) out.file = err.file;
+	if (err.line !== undefined) out.line = err.line;
+	if (err.guidance !== undefined) out.guidance = err.guidance;
+	if (err.retry !== undefined) out.retry = err.retry;
+	if (err.cause !== undefined) {
+		out.cause =
+			err.cause instanceof Error
+				? { message: err.cause.message, name: err.cause.name }
+				: String(err.cause);
+	}
+	return out;
+}
+
+// ---------------------------------------------------------------------------
+// Predicates
+// ---------------------------------------------------------------------------
+
+/**
+ * Narrow an `unknown` value to a {@link StructuredError}.
+ *
+ * Purpose: safe type guards at catch boundaries where the thrown value may
+ * be a plain Error, a StructuredError, or something else entirely.
+ */
+export function isStructuredError(val: unknown): val is StructuredError {
+	return (
+		typeof val === "object" &&
+		val !== null &&
+		"message" in val &&
+		typeof (val as Record<string, unknown>).message === "string"
+	);
+}
--- a/src/resources/extensions/sf/auto-post-unit.ts
+++ b/src/resources/extensions/sf/auto-post-unit.ts
@ -1046,6 +1046,10 @@ export async function postUnitPreVerification(
 								s.basePath,
 								expectedOutput,
 								plannedFiles,
+								{
+									source: s.stagedPendingCommit ? "staged" : "last-commit",
+									baselineFiles: s.preUnitDirtyFiles,
+								},
 							);
 							if (audit && audit.violations.length > 0) {
 								const warnings = audit.violations.filter(
--- a/src/resources/extensions/sf/auto/phases.ts
+++ b/src/resources/extensions/sf/auto/phases.ts
@ -51,6 +51,7 @@ import {
 	readProductionMutationApprovalStatus,
 } from "../production-mutation-approval.js";
 import { resetEvidence } from "../safety/evidence-collector.js";
+import { getDirtyFiles } from "../safety/file-change-validator.js";
 import {
 	cleanupCheckpoint,
 	createCheckpoint,
@ -1776,6 +1777,15 @@ export async function runUnitPhase(
 	if (safetyConfig.enabled && safetyConfig.evidence_collection) {
 		resetEvidence();
 	}
+	if (
+		safetyConfig.enabled &&
+		safetyConfig.file_change_validation &&
+		unitType === "execute-task"
+	) {
+		s.preUnitDirtyFiles = getDirtyFiles(s.basePath);
+	} else {
+		s.preUnitDirtyFiles = [];
+	}
 	// Only checkpoint code-executing units (not lifecycle/planning units)
 	if (
 		safetyConfig.enabled &&
@ -2320,6 +2330,7 @@ export async function runUnitPhase(
 		}
 		s.checkpointSha = null;
 	}
+	s.preUnitDirtyFiles = [];

 	return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt } };
 }
--- a/src/resources/extensions/sf/auto/session.ts
+++ b/src/resources/extensions/sf/auto/session.ts
@ -186,6 +186,8 @@ export class AutoSession {
 	// ── Safety harness ───────────────────────────────────────────────────────
 	/** SHA of the pre-unit git checkpoint ref. Cleared on success or rollback. */
 	checkpointSha: string | null = null;
+	/** Dirty files captured before the current execute-task unit starts. */
+	preUnitDirtyFiles: string[] = [];

 	// ── Deferred commit (Fix 1) ──────────────────────────────────────────────
 	/**
@ -326,6 +328,7 @@ export class AutoSession {
 		this.isolationDegraded = false;
 		this.milestoneMergedInPhases = false;
 		this.checkpointSha = null;
+		this.preUnitDirtyFiles = [];
 		this.stagedPendingCommit = false;
 		this.pendingCommitTaskContext = null;

--- a/src/resources/extensions/sf/doc-checker.ts
+++ b/src/resources/extensions/sf/doc-checker.ts
@ -0,0 +1,186 @@
+/**
+ * Doc Scaffold Checker — validates the agentic docs scaffold is filled in.
+ *
+ * Purpose: Mechanical enforcement of harness-engineering principles. After
+ * bootstrap or milestone close, check that scaffold files contain real content
+ * beyond the template stubs. Reports findings so the agent knows what needs
+ * attention — never blocks, only surfaces.
+ *
+ * Consumer: bootstrapProject (after scaffold init), milestone close workflows.
+ */
+
+import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
+import { join } from "node:path";
+
+export interface ScaffoldCheck {
+	file: string;       // relative path from repo root
+	status: "ok" | "empty" | "stub" | "missing";
+	lines: number;
+	note: string;
+}
+
+export interface DocScaffoldReport {
+	checkedAt: string;
+	repoRoot: string;
+	checks: ScaffoldCheck[];
+	summary: {
+		total: number;
+		ok: number;
+		empty: number;
+		stub: number;
+		missing: number;
+	};
+}
+
+/** Files created by ensureAgenticDocsScaffold that should contain real content. */
+const SCAFFOLD_FILES = [
+	// Root routing
+	"AGENTS.md",
+	"ARCHITECTURE.md",
+	// docs/ structure
+	"docs/AGENTS.md",
+	"docs/PLANS.md",
+	"docs/DESIGN.md",
+	"docs/FRONTEND.md",
+	"docs/QUALITY_SCORE.md",
+	"docs/RELIABILITY.md",
+	"docs/SECURITY.md",
+	"docs/product-specs/index.md",
+	"docs/product-specs/new-user-onboarding.md",
+	"docs/design-docs/index.md",
+	"docs/design-docs/core-beliefs.md",
+	"docs/exec-plans/active/index.md",
+	"docs/exec-plans/completed/index.md",
+	"docs/exec-plans/tech-debt-tracker.md",
+	"docs/exec-plans/AGENTS.md",
+	"docs/records/index.md",
+	"docs/records/AGENTS.md",
+	"docs/RECORDS_KEEPER.md",
+	// src/ and tests/ routing
+	"src/AGENTS.md",
+	"tests/AGENTS.md",
+] as const;
+
+// Minimum lines considered "real content" vs stub. Template stubs are ~3-8 lines.
+const STUB_LINE_COUNT = 10;
+
+// Files that are allowed to stay as stubs (index/placeholder files)
+const STUB_ALLOWED = new Set([
+	"docs/product-specs/index.md",
+	"docs/design-docs/index.md",
+	"docs/exec-plans/active/index.md",
+	"docs/exec-plans/completed/index.md",
+	"docs/records/index.md",
+]);
+
+function countContentLines(content: string): number {
+	// Count non-empty, non-comment lines
+	return content
+		.split("\n")
+		.filter((line) => {
+			const trimmed = line.trim();
+			return trimmed.length > 0 && !trimmed.startsWith("//") && !trimmed.startsWith("#");
+		})
+		.length;
+}
+
+function checkFile(repoRoot: string, relPath: string): ScaffoldCheck {
+	const fullPath = join(repoRoot, relPath);
+	if (!existsSync(fullPath)) {
+		return {
+			file: relPath,
+			status: "missing",
+			lines: 0,
+			note: "File does not exist — scaffold not run or was interrupted",
+		};
+	}
+
+	let content: string;
+	try {
+		const stat = statSync(fullPath);
+		if (stat.isDirectory()) {
+			return { file: relPath, status: "stub", lines: 0, note: "Is a directory, expected a file" };
+		}
+		content = readFileSync(fullPath, "utf-8");
+	} catch {
+		return { file: relPath, status: "stub", lines: 0, note: "Could not read file" };
+	}
+
+	const lines = content.split("\n").filter((l) => l.trim().length > 0).length;
+	const contentLines = countContentLines(content);
+
+	if (lines === 0) {
+		return { file: relPath, status: "empty", lines: 0, note: "File is empty" };
+	}
+
+	if (contentLines < STUB_LINE_COUNT) {
+		const note = STUB_ALLOWED.has(relPath)
+			? `Stub file (${lines} lines) — acceptable for index/placeholder`
+			: `Stub file (${lines} lines) — needs real content beyond template`;
+		return {
+			file: relPath,
+			status: STUB_ALLOWED.has(relPath) ? "ok" : "stub",
+			lines,
+			note,
+		};
+	}
+
+	return {
+		file: relPath,
+		status: "ok",
+		lines,
+		note: `Contains ${contentLines} content lines`,
+	};
+}
+
+/**
+ * Check all scaffold files in a repo. Returns a structured report.
+ * Never throws — all errors are caught and reported as stub/missing.
+ */
+export function checkDocsScaffold(repoRoot: string): DocScaffoldReport {
+	const checks: ScaffoldCheck[] = [];
+	for (const file of SCAFFOLD_FILES) {
+		checks.push(checkFile(repoRoot, file));
+	}
+
+	const summary = {
+		total: checks.length,
+		ok: checks.filter((c) => c.status === "ok").length,
+		empty: checks.filter((c) => c.status === "empty").length,
+		stub: checks.filter((c) => c.status === "stub").length,
+		missing: checks.filter((c) => c.status === "missing").length,
+	};
+
+	return {
+		checkedAt: new Date().toISOString(),
+		repoRoot,
+		checks,
+		summary,
+	};
+}
+
+/**
+ * Format a report as human-readable text for logging to stderr.
+ */
+export function formatDocCheckReport(report: DocScaffoldReport): string {
+	const lines: string[] = [];
+	lines.push(`[doc-checker] Scaffold check — ${report.checkedAt}`);
+	lines.push(
+		`  ${report.summary.ok}/${report.summary.total} files OK`
+			+ (report.summary.stub > 0 ? ` · ${report.summary.stub} need content` : "")
+			+ (report.summary.missing > 0 ? ` · ${report.summary.missing} missing` : "")
+			+ (report.summary.empty > 0 ? ` · ${report.summary.empty} empty` : ""),
+	);
+
+	const issues = report.checks.filter((c) => c.status !== "ok");
+	if (issues.length > 0) {
+		lines.push("  Files needing attention:");
+		for (const issue of issues) {
+			lines.push(`    [${issue.status}] ${issue.file} — ${issue.note}`);
+		}
+	} else {
+		lines.push("  All scaffold files contain real content.");
+	}
+
+	return lines.join("\n");
+}
--- a/src/resources/extensions/sf/prompts/validate-milestone.md
+++ b/src/resources/extensions/sf/prompts/validate-milestone.md
@ -24,15 +24,19 @@ All relevant context has been preloaded below — the roadmap, all slice summari

 ### Step 1 — Dispatch Parallel Reviewers

-Call `subagent` with `tasks: [...]` containing ALL THREE reviewers simultaneously:
+Call `subagent` with `tasks: [...]` containing ALL THREE reviewers simultaneously.
+Use `agent: "reviewer"` for every validation reviewer. Do not use `code`, `coder`, or `worker` here — this is review/validation work, not implementation.

 **Reviewer A — Requirements Coverage**
+Agent: `reviewer`
 Prompt: "Review milestone {{milestoneId}} requirements coverage. Working directory: {{workingDirectory}}. Read `.sf/{{milestoneId}}/REQUIREMENTS.md` (or equivalent requirements file). For each requirement, check the slice SUMMARY files in `.sf/{{milestoneId}}/` to determine if it is: COVERED (clearly demonstrated), PARTIAL (mentioned but not fully demonstrated), or MISSING (no evidence). Output a markdown table with columns: Requirement | Status | Evidence. End with a one-line verdict: PASS if all covered, NEEDS-ATTENTION if partials exist, FAIL if any missing."

 **Reviewer B — Cross-Slice Integration**
+Agent: `reviewer`
 Prompt: "Review milestone {{milestoneId}} cross-slice integration. Working directory: {{workingDirectory}}. Read `{{roadmapPath}}` and find the boundary map (produces/consumes contracts). For each boundary, check that the producing slice's SUMMARY confirms it produced the artifact, and the consuming slice's SUMMARY confirms it consumed it. Output a markdown table: Boundary | Producer Summary | Consumer Summary | Status. End with a one-line verdict: PASS if all boundaries honored, NEEDS-ATTENTION if any gaps."

 **Reviewer C — Assessment & Acceptance Criteria**
+Agent: `reviewer`
 Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.sf/{{milestoneId}}/CONTEXT.md` for acceptance criteria. Check for ASSESSMENT files in each slice directory. Verify each acceptance criterion maps to either a passing assessment result or clear SUMMARY evidence. Then review the inlined milestone verification classes from planning. For each non-empty planned class, output a markdown table: Class | Planned Check | Evidence | Verdict. Use the exact class names `Contract`, `Integration`, `Operational`, and `UAT` whenever those classes are present. If no verification classes were planned, say that explicitly. Output two sections: `Acceptance Criteria` with a checklist `[ ] Criterion | Evidence`, and `Verification Classes` with the table. End with a one-line verdict: PASS if all criteria and verification classes are covered, NEEDS-ATTENTION if gaps exist."

 ### Step 2 — Synthesize Findings
--- a/src/resources/extensions/sf/safety/file-change-validator.ts
+++ b/src/resources/extensions/sf/safety/file-change-validator.ts
@ -1,10 +1,11 @@
 /**
 * Post-unit file change validator for auto-mode safety harness.
- * Compares actual git diff against the task plan's expected output files.
+ * Compares actual file changes against the task plan's expected output files.
 *
 * Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
 * and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
- * Compares against git diff HEAD~1 --name-only after auto-commit.
+ * Defaults to git diff HEAD~1 --name-only after auto-commit. Deferred-commit
+ * flows can instead validate the staged diff before the commit is created.
 *
 * Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
 */
@ -26,9 +27,15 @@ export interface FileChangeAudit {
 	actualFiles: string[];
 	unexpectedFiles: string[];
 	missingFiles: string[];
+	ignoredBaselineFiles: string[];
 	violations: FileViolation[];
 }

+export interface FileChangeValidationOptions {
+	source?: "last-commit" | "staged";
+	baselineFiles?: readonly string[];
+}
+
 // ─── Public API ─────────────────────────────────────────────────────────────

 /**
@ -43,34 +50,45 @@ export function validateFileChanges(
 	basePath: string,
 	expectedOutput: string[],
 	plannedFiles: string[],
+	options: FileChangeValidationOptions = {},
 ): FileChangeAudit | null {
 	const allExpected = new Set([...expectedOutput, ...plannedFiles]);

 	// If no expected files were planned, skip validation
 	if (allExpected.size === 0) return null;

-	// Get actual changed files from last commit
-	const actualFiles = getChangedFilesFromLastCommit(basePath);
+	const source = options.source ?? "last-commit";
+	const actualFiles =
+		source === "staged"
+			? getChangedFilesFromStagedDiff(basePath)
+			: getChangedFilesFromLastCommit(basePath);
 	if (!actualFiles) return null;

 	// Filter out .sf/ internal files — only validate project source files
 	const projectFiles = actualFiles.filter(
 		(f) => !f.startsWith(".sf/") && !f.startsWith(".sf\\"),
 	);
+	const baselineFiles = new Set(
+		(options.baselineFiles ?? []).map(normalizeProjectPath),
+	);
+	const validationFiles = projectFiles
+		.map(normalizeProjectPath)
+		.filter((f) => !baselineFiles.has(f));
+	const ignoredBaselineFiles = projectFiles
+		.map(normalizeProjectPath)
+		.filter((f) => baselineFiles.has(f));

 	// Normalize expected paths (strip leading ./ or /)
 	const normalizedExpected = new Set(
-		[...allExpected].map((f) =>
-			normalizePlannedFileReference(f).replace(/^\.\//, "").replace(/^\//, ""),
-		),
+		[...allExpected].map((f) => normalizeProjectPath(normalizePlannedFileReference(f))),
 	);

 	// Compute symmetric difference
-	const unexpectedFiles = projectFiles.filter(
+	const unexpectedFiles = validationFiles.filter(
 		(f) => !normalizedExpected.has(f),
 	);
 	const missingFiles = [...normalizedExpected].filter(
-		(f) => !projectFiles.includes(f),
+		(f) => !validationFiles.includes(f),
 	);

 	const violations: FileViolation[] = [];
@ -93,15 +111,50 @@ export function validateFileChanges(

 	return {
 		expectedFiles: [...normalizedExpected],
-		actualFiles: projectFiles,
+		actualFiles: validationFiles,
 		unexpectedFiles,
 		missingFiles,
+		ignoredBaselineFiles,
 		violations,
 	};
 }

+/**
+ * Capture the dirty-file baseline at unit start. Post-unit validation uses this
+ * to avoid warning on files that were already dirty before the task ran.
+ */
+export function getDirtyFiles(basePath: string): string[] {
+	try {
+		const result = execFileSync(
+			"git",
+			["status", "--porcelain=v1", "--untracked-files=all"],
+			{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+		).trim();
+		if (!result) return [];
+		return result
+			.split("\n")
+			.map((line) => line.slice(3).trim())
+			.map((file) => {
+				const renamed = file.split(" -> ");
+				return renamed[renamed.length - 1] ?? file;
+			})
+			.filter(Boolean)
+			.map(normalizeProjectPath);
+	} catch (e) {
+		logWarning(
+			"safety",
+			`git status failed in file-change-validator: ${(e as Error).message}`,
+		);
+		return [];
+	}
+}
+
 // ─── Internals ──────────────────────────────────────────────────────────────

+function normalizeProjectPath(file: string): string {
+	return file.replace(/^\.\//, "").replace(/^\//, "");
+}
+
 function getChangedFilesFromLastCommit(basePath: string): string[] | null {
 	try {
 		const result = execFileSync(
@ -118,3 +171,20 @@ function getChangedFilesFromLastCommit(basePath: string): string[] | null {
 		return null;
 	}
 }
+
+function getChangedFilesFromStagedDiff(basePath: string): string[] | null {
+	try {
+		const result = execFileSync(
+			"git",
+			["diff", "--name-only", "--cached"],
+			{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
+		).trim();
+		return result ? result.split("\n").filter(Boolean).map(normalizeProjectPath) : [];
+	} catch (e) {
+		logWarning(
+			"safety",
+			`git staged diff failed in file-change-validator: ${(e as Error).message}`,
+		);
+		return null;
+	}
+}
--- a/src/resources/extensions/sf/skills/researcher/SKILL.md
+++ b/src/resources/extensions/sf/skills/researcher/SKILL.md
@ -0,0 +1,223 @@
+---
+name: researcher
+description: Researches codebase, project state, and external knowledge using local search, SF database queries, and web search. Use when investigating an unfamiliar subsystem, understanding project milestones and requirements, or gathering evidence for planning. Produces structured research reports.
+---
+
+<objective>
+Research a topic using four complementary information sources, in priority order:
+1. **Serena MCP** (46 LSP-backed tools: symbol search, file read, find references, pattern search) — use FIRST for code exploration
+2. **sift** (hybrid BM25+vector local search) — use when Serena symbol search isn't enough
+3. **SF project database** (sqlite3) — use for project state (milestones, requirements, decisions)
+4. **Web search** — use for external documentation and current information
+
+This skill is the first step before planning — it produces the evidence base that drives good decisions. Without research, agents plan from assumptions; with this skill, they plan from evidence.
+</objective>
+
+<quick_start>
+
+**Serena MCP (code intelligence — USE FIRST for code exploration):**
+```bash
+# Discover Serena tools (she has 46 tools available)
+mcp_servers
+
+# Get Serena's full tool list
+mcp_discover server=serena
+
+# Symbol search — find where a function/type is defined
+mcp_call server=serena tool=find_symbol arguments={contextLines=5,matchPattern="resolveSubagentLaunchSpec"}
+
+# Find all references to a symbol (callers, usages)
+mcp_call server=serena tool=find_referencing_symbols arguments={contextLines=3,matchPattern="resolveSubagentLaunchSpec"}
+
+# Read a file (Serena's LSP-backed read is faster than bash cat)
+mcp_call server=serena tool=read_file arguments={file_path="src/resources/extensions/subagent/index.ts"}
+
+# Search for pattern in files
+mcp_call server=serena tool=search_for_pattern arguments={pattern="call_scout",filePattern="*.ts",contextLines=3}
+
+# List directory
+mcp_call server=serena tool=list_dir arguments={path="src/resources/extensions/sf/skills/"}
+```
+
+**Local code search — sift (hybrid BM25+vector search):**
+```bash
+sift search --strategy path-hybrid "authentication middleware"
+sift search --strategy hybrid --limit 5 "where is the write gate registered"
+```
+
+**SF project database queries:**
+```bash
+# Current milestone and slices
+sqlite3 .sf/sf.db "SELECT id, title, status FROM milestones WHERE status='active'"
+
+# All requirements
+sqlite3 .sf/sf.db "SELECT id, class, status, description FROM requirements"
+
+# Recent decisions
+sqlite3 .sf/sf.db "SELECT id, scope, decision FROM decisions ORDER BY seq DESC LIMIT 10"
+
+# Tasks by slice
+sqlite3 .sf/sf.db "SELECT id, title, status FROM tasks WHERE milestone_id='M001' AND slice_id='S01'"
+```
+
+**Web search — use the search-the-web tool directly for current information.**
+</quick_start>
+
+<workflow>
+
+## Step 1: Clarify the research goal
+
+Before searching, identify what you need to know:
+- **Code exploration** (finding functions, types, references) → use Serena MCP first
+- **Project state** (milestones, slices, tasks, requirements) → query the SF DB
+- **Current external information** → use web search
+- **All of the above** → combine all four sources
+
+## Step 2: Explore code with Serena MCP (priority)
+
+ Serena is an LSP-backed code intelligence layer. Use `mcp_call` to invoke her tools:
+
+```bash
+# Find where a function or type is defined
+mcp_call server=serena tool=find_symbol arguments={matchPattern="MyFunction",contextLines=5}
+
+# Find all callers/references to a symbol
+mcp_call server=serena tool=find_referencing_symbols arguments={matchPattern="MyFunction",contextLines=3}
+
+# Read a specific file
+mcp_call server=serena tool=read_file arguments={file_path="src/my-file.ts"}
+
+# Grep-like search across the codebase
+mcp_call server=serena tool=search_for_pattern arguments={pattern="TODO.*auth",filePattern="*.ts"}
+```
+
+## Step 3: Supplement with sift (when Serena isn't enough)
+
+Use sift when you need semantic/hybrid search across unstructured content:
+
+```bash
+# Hybrid search for conceptual matches
+sift search --strategy hybrid --limit 5 "authentication middleware token validation"
+```
+
+## Step 4: Query the SF project database
+
+The SF database (`.sf/sf.db`) contains the canonical project state:
+
+```bash
+# List active milestones with their slices
+sqlite3 .sf/sf.db "
+  SELECT m.id, m.title, m.status, s.id, s.title, s.status
+  FROM milestones m
+  LEFT JOIN slices s ON s.milestone_id = m.id
+  WHERE m.status IN ('active','planning')
+  ORDER BY m.id, s.id
+"
+
+# Get requirements by status
+sqlite3 .sf/sf.db "SELECT id, class, status, description FROM requirements WHERE status='active'"
+
+# Recent decisions (most recent first)
+sqlite3 .sf/sf.db "SELECT id, scope, decision, choice FROM decisions ORDER BY seq DESC LIMIT 20"
+
+# Blocked or pending tasks
+sqlite3 .sf/sf.db "SELECT id, title, status FROM tasks WHERE status IN ('blocked','pending')"
+
+# Artifacts (plans, summaries) for a milestone
+sqlite3 .sf/sf.db "SELECT path, artifact_type FROM artifacts WHERE milestone_id='M001'"
+```
+
+## Step 5: Web search for external information
+
+Use `search-the-web` for documentation, tutorials, or current best practices:
+
+```bash
+search_the_web "Next.js 15 app router migration guide"
+```
+
+## Step 6: Synthesize into a research report
+
+Write findings to the appropriate artifact:
+- Milestone research → `.sf/milestones/{mid}/{mid}-RESEARCH.md`
+- Slice research → `.sf/milestones/{mid}/slices/{sid}/{sid}-RESEARCH.md`
+- Ad-hoc research → `.sf/research/{topic}.md`
+
+**Research report structure:**
+```markdown
+# Research: {topic}
+
+## Goal
+What question are we answering?
+
+## SF Project State
+What does the SF DB say? (milestones, requirements, decisions relevant to this topic)
+
+## Codebase Evidence
+What did sift find? (key file:line references)
+
+## External Knowledge
+What did web search reveal?
+
+## Findings
+Bullet points of the most important discoveries
+
+## Gaps
+What is still unknown or needs verification?
+
+## Recommendations
+What should the agent do next?
+```
+</workflow>
+
+<success_criteria>
+- Research report written to the correct artifact path
+- At least one SF DB query executed and cited
+- At least one sift search executed and cited
+- Findings are specific (file:line or table:row references), not generic
+- Gaps identified honestly — what you could not determine
+</success_criteria>
+
+<reference_guides>
+
+### Useful SF DB queries
+
+```sql
+-- All milestones with completion status
+SELECT id, title, status, completed_at FROM milestones ORDER BY id;
+
+-- All slices for a milestone
+SELECT id, title, status, risk FROM slices WHERE milestone_id='M001' ORDER BY sequence;
+
+-- Tasks with verification status
+SELECT t.id, t.title, t.status, t.verification_status
+FROM tasks t WHERE t.milestone_id='M001' AND t.slice_id='S01';
+
+-- Open requirements
+SELECT id, class, description FROM requirements WHERE status IN ('active','pending');
+
+-- Decisions by scope
+SELECT id, scope, decision FROM decisions WHERE scope='architecture' ORDER BY seq DESC;
+
+-- Memory entries
+SELECT id, category, content FROM memories ORDER BY seq DESC LIMIT 20;
+```
+
+### sift strategies
+
+| Strategy | When to use |
+|---|---|
+| `path-hybrid` | Default. File path + content matching — best for most queries |
+| `hybrid` | Pure content matching — when you don't care about file names |
+| `page-index-hybrid` | Web-page-like content (documentation) |
+| `bm25` | Exact keyword matching — fast fallback |
+
+### DB schema reference
+
+- `milestones` — id, title, status, vision, success_criteria (JSON), completed_at
+- `slices` — milestone_id, id, title, status, risk, goal, success_criteria
+- `tasks` — milestone_id, slice_id, id, title, status, one_liner, narrative, verification_result
+- `requirements` — id, class, status, description, why, source, primary_owner, validation
+- `decisions` — seq, id, scope, decision, choice, rationale, revisable, made_by
+- `artifacts` — path, artifact_type, milestone_id, slice_id, task_id
+- `memories` — id, category, content, confidence
+</reference_guides>
--- a/src/resources/extensions/subagent/index.ts
+++ b/src/resources/extensions/subagent/index.ts
@ -48,6 +48,8 @@ const COLLAPSED_ITEM_COUNT = 10;
 const liveSubagentProcesses = new Set<ChildProcess>();
 const AGENT_ALIASES: Record<string, string> = {
 	default: "worker",
+	code: "reviewer",
+	coder: "typescript-pro",
 	["g" + "sd-executor"]: "worker",
 	"sf-worker": "worker",
 	"sf-scout": "scout",
--- a/src/resources/extensions/subagent/tests/node-launch.test.ts
+++ b/src/resources/extensions/subagent/tests/node-launch.test.ts
@ -0,0 +1,34 @@
+import assert from "node:assert/strict";
+import { readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import test from "node:test";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const subagentSrc = readFileSync(join(__dirname, "..", "index.ts"), "utf-8");
+
+test("subagent launcher resolves Node command specs instead of shelling through bash", () => {
+	assert.match(subagentSrc, /function resolveSubagentLaunchSpec\(/);
+	assert.match(
+		subagentSrc,
+		/command = process\.env\.SF_NODE_BIN \|\| process\.execPath/,
+	);
+	assert.doesNotMatch(subagentSrc, /bash -lc/);
+});
+
+test("normal subagent execution spawns the resolved Node command with argv array", () => {
+	assert.match(
+		subagentSrc,
+		/spawn\(\s*launchSpec\.command,\s*\[\.\.\.extensionArgs,\s*\.\.\.launchSpec\.args\]/,
+	);
+	assert.match(subagentSrc, /shell:\s*false/);
+});
+
+test("cmux launcher writes only explicit environment patch, not the full process env", () => {
+	assert.match(subagentSrc, /function writeNodeSubagentLauncher\(/);
+	assert.match(
+		subagentSrc,
+		/const env = \{ \.\.\.process\.env, \.\.\.\$\{JSON\.stringify\(launchSpec\.envPatch\)\} \}/,
+	);
+	assert.doesNotMatch(subagentSrc, /JSON\.stringify\(launchSpec\.env\)/);
+});
--- a/src/resources/extensions/vectordrive/extension-manifest.json
+++ b/src/resources/extensions/vectordrive/extension-manifest.json
@ -0,0 +1,12 @@
+{
+	"id": "vectordrive",
+	"name": "VectorDrive",
+	"version": "1.0.0",
+	"description": "Native vector database integration via vectordrive (Rust-based, in-process)",
+	"tier": "bundled",
+	"requires": { "platform": ">=2.71.0" },
+	"provides": {
+		"tools": ["vectordrive_info", "vectordrive_store", "vectordrive_search"],
+		"hooks": ["session_start", "session_shutdown"]
+	}
+}
--- a/src/resources/extensions/vectordrive/index.ts
+++ b/src/resources/extensions/vectordrive/index.ts
@ -0,0 +1,36 @@
+/**
+ * VectorDrive Extension for Singularity Forge
+ *
+ * Integrates the native Rust vectordrive vector database for semantic
+ * memory and code search. Works offline with no external services.
+ */
+
+import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
+import { VectordriveManager } from "./manager.js";
+import { registerVectordriveInfoTool } from "./tool-info.js";
+import { registerVectordriveStoreTool } from "./tool-store.js";
+import { registerVectordriveSearchTool } from "./tool-search.js";
+
+export default function (pi: ExtensionAPI) {
+	registerVectordriveInfoTool(pi);
+	registerVectordriveStoreTool(pi);
+	registerVectordriveSearchTool(pi);
+
+	// Pre-warm the connection on session start
+	pi.on("session_start", async (_event, ctx) => {
+		const manager = VectordriveManager.getInstance();
+		const status = await manager.getStatus();
+
+		if (ctx.hasUI && status.backend === "none" && status.error) {
+			ctx.ui.notify(
+				`VectorDrive unavailable: ${status.error}`,
+				"warning",
+			);
+		}
+	});
+
+	pi.on("session_shutdown", async () => {
+		const manager = VectordriveManager.getInstance();
+		await manager.close();
+	});
+}
--- a/src/resources/extensions/vectordrive/manager.ts
+++ b/src/resources/extensions/vectordrive/manager.ts
@ -0,0 +1,205 @@
+/**
+ * VectorDrive Manager — Singleton wrapping the native vectordrive VectorDb.
+ *
+ * Loads the `vectordrive` npm package dynamically (optional dependency),
+ * creates a persisted VectorDb in `.sf/vectordrive/`, and exposes status
+ * and search/store operations with graceful degradation.
+ */
+
+import { mkdirSync } from "node:fs";
+import { dirname } from "node:path";
+
+export type VectorBackend = "vectordrive" | "none";
+
+export interface VectordriveStatus {
+	backend: VectorBackend;
+	version: string | null;
+	implementation: string | null;
+	initialized: boolean;
+	vectorCount: number;
+	error: string | null;
+	dbPath: string | null;
+}
+
+export interface VectorEntry {
+	id: string;
+	vector: number[];
+	metadata?: Record<string, unknown>;
+}
+
+export interface SearchResult {
+	id: string;
+	score: number;
+	metadata?: Record<string, unknown>;
+}
+
+const DB_DIR = ".sf/vectordrive";
+const DB_PATH = `${DB_DIR}/forge.vectors`;
+const DIMENSIONS = 384;
+
+function getDbPath(): string {
+	const home = process.env.HOME || process.env.USERPROFILE || ".";
+	return `${home}/${DB_PATH}`;
+}
+
+function ensureDir(path: string): void {
+	try {
+		mkdirSync(dirname(path), { recursive: true });
+	} catch {
+		// ignore
+	}
+}
+
+/** Simple text→vector fallback when no embedding model is available. */
+export function textToVector(text: string, dimensions: number = DIMENSIONS): number[] {
+	const vec = new Array(dimensions).fill(0);
+	const normalized = text.toLowerCase().trim();
+	for (let i = 0; i < normalized.length; i++) {
+		vec[i % dimensions] += normalized.charCodeAt(i) / 65535;
+	}
+	const mag = Math.sqrt(vec.reduce((s, v) => s + v * v, 0));
+	return mag > 0 ? vec.map((v) => v / mag) : vec;
+}
+
+export class VectordriveManager {
+	private static instance: VectordriveManager;
+	private status: VectordriveStatus | null = null;
+	private initPromise: Promise<VectordriveStatus> | null = null;
+	// eslint-disable-next-line @typescript-eslint/no-explicit-any
+	private db: any | null = null;
+	// eslint-disable-next-line @typescript-eslint/no-explicit-any
+	private vd: any | null = null;
+
+	private constructor() {}
+
+	static getInstance(): VectordriveManager {
+		if (!VectordriveManager.instance) {
+			VectordriveManager.instance = new VectordriveManager();
+		}
+		return VectordriveManager.instance;
+	}
+
+	async getStatus(): Promise<VectordriveStatus> {
+		if (this.status?.initialized) return this.status;
+		if (this.initPromise) return this.initPromise;
+		this.initPromise = this.probe();
+		return this.initPromise;
+	}
+
+	// eslint-disable-next-line @typescript-eslint/no-explicit-any
+	async getDb(): Promise<any | null> {
+		const status = await this.getStatus();
+		if (status.backend !== "vectordrive") return null;
+		return this.db;
+	}
+
+	private async probe(): Promise<VectordriveStatus> {
+		const dbPath = getDbPath();
+
+		let vectordrive: any | null = null;
+		try {
+			const modName = "vectordrive";
+			vectordrive = await import(modName);
+		} catch (err) {
+			this.status = {
+				backend: "none",
+				version: null,
+				implementation: null,
+				initialized: true,
+				vectorCount: 0,
+				error: `vectordrive package not installed: ${err instanceof Error ? err.message : String(err)}`,
+				dbPath: null,
+			};
+			return this.status;
+		}
+
+		try {
+			this.vd = vectordrive;
+			ensureDir(dbPath);
+
+			const VectorDb = vectordrive.VectorDb || vectordrive.VectorDB;
+			if (typeof VectorDb !== "function") {
+				throw new Error("vectordrive package does not export VectorDb");
+			}
+
+			this.db = new VectorDb({
+				dimensions: DIMENSIONS,
+				storagePath: dbPath,
+				distanceMetric: "cosine",
+			});
+
+			const count = (await this.db.len()) as number;
+			const version = vectordrive.getVersion?.() ?? null;
+			const impl = vectordrive.getImplementationType?.() ?? "unknown";
+
+			this.status = {
+				backend: "vectordrive",
+				version: version?.version ?? null,
+				implementation: impl,
+				initialized: true,
+				vectorCount: count,
+				error: null,
+				dbPath,
+			};
+			return this.status;
+		} catch (err) {
+			this.status = {
+				backend: "none",
+				version: null,
+				implementation: null,
+				initialized: true,
+				vectorCount: 0,
+				error: err instanceof Error ? err.message : String(err),
+				dbPath: null,
+			};
+			return this.status;
+		}
+	}
+
+	async store(entry: VectorEntry): Promise<boolean> {
+		const db = await this.getDb();
+		if (!db) return false;
+		try {
+			await db.insert({
+				id: entry.id,
+				vector: entry.vector,
+				metadata: entry.metadata,
+			});
+			return true;
+		} catch {
+			return false;
+		}
+	}
+
+	async search(vector: number[], k: number): Promise<SearchResult[]> {
+		const db = await this.getDb();
+		if (!db) return [];
+		try {
+			const results = await db.search({ vector, k });
+			return results.map((r: any) => ({
+				id: String(r.id),
+				score: Number(r.score),
+				metadata: r.metadata,
+			}));
+		} catch {
+			return [];
+		}
+	}
+
+	async delete(id: string): Promise<boolean> {
+		const db = await this.getDb();
+		if (!db) return false;
+		try {
+			return await db.delete(id);
+		} catch {
+			return false;
+		}
+	}
+
+	async close(): Promise<void> {
+		this.db = null;
+		this.vd = null;
+		this.status = null;
+		this.initPromise = null;
+	}
+}
--- a/src/resources/extensions/vectordrive/tests/manager.test.ts
+++ b/src/resources/extensions/vectordrive/tests/manager.test.ts
@ -0,0 +1,36 @@
+import { describe, expect, it } from "vitest";
+import { VectordriveManager, textToVector } from "../manager.js";
+
+describe("VectordriveManager", () => {
+	it("should return singleton instance", () => {
+		const a = VectordriveManager.getInstance();
+		const b = VectordriveManager.getInstance();
+		expect(a).toBe(b);
+	});
+
+	it("should degrade gracefully when vectordrive is not installed", async () => {
+		const manager = VectordriveManager.getInstance();
+		await manager.close();
+
+		const status = await manager.getStatus();
+		expect(status.initialized).toBe(true);
+		expect(status.backend).toBe("none");
+		expect(status.error).toBeTruthy();
+	});
+});
+
+describe("textToVector", () => {
+	it("should produce normalized vectors", () => {
+		const v = textToVector("hello world", 384);
+		expect(v).toHaveLength(384);
+		const mag = Math.sqrt(v.reduce((s, x) => s + x * x, 0));
+		expect(mag).toBeCloseTo(1, 5);
+	});
+
+	it("should produce different vectors for different texts", () => {
+		const a = textToVector("authentication middleware for express", 384);
+		const b = textToVector("database migration helper in python", 384);
+		const similarity = a.reduce((s, x, i) => s + x * b[i], 0);
+		expect(similarity).toBeLessThan(0.95);
+	});
+});
--- a/src/resources/extensions/vectordrive/tool-info.ts
+++ b/src/resources/extensions/vectordrive/tool-info.ts
@ -0,0 +1,68 @@
+/**
+ * VectorDrive Info Tool
+ *
+ * Introspects the vectordrive native package status, version, implementation
+ * type (native vs wasm), and vector count.
+ */
+
+import { Type } from "@sinclair/typebox";
+import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
+import { VectordriveManager } from "./manager.js";
+
+export interface ToolExecutionResult {
+	content: Array<{ type: "text"; text: string }>;
+	details: Record<string, unknown>;
+	isError?: boolean;
+}
+
+export function registerVectordriveInfoTool(pi: ExtensionAPI): void {
+	pi.registerTool({
+		name: "vectordrive_info",
+		label: "VectorDrive Info",
+		description:
+			"Check VectorDrive native vector database status. " +
+			"Returns implementation type (native Rust or WASM), version, " +
+			"vector count, and database path.",
+		promptSnippet: "Check VectorDrive database status and capabilities",
+		parameters: Type.Object({
+			refresh: Type.Optional(
+				Type.Boolean({
+					default: false,
+					description: "Force re-probe instead of using cached status",
+				}),
+			),
+		}),
+		async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
+			const manager = VectordriveManager.getInstance();
+
+			if (params.refresh) {
+				await manager.close();
+			}
+
+			const status = await manager.getStatus();
+
+			const lines: string[] = [];
+			lines.push(`# VectorDrive Status`);
+			lines.push("");
+			lines.push(`- **Backend**: ${status.backend}`);
+			lines.push(`- **Implementation**: ${status.implementation ?? "n/a"}`);
+			lines.push(`- **Version**: ${status.version ?? "n/a"}`);
+			lines.push(`- **Vectors**: ${status.vectorCount}`);
+			lines.push(`- **Initialized**: ${status.initialized}`);
+			if (status.dbPath) {
+				lines.push(`- **DB Path**: ${status.dbPath}`);
+			}
+			if (status.error) {
+				lines.push(`- **Error**: ${status.error}`);
+			}
+
+			const text = lines.join("\n");
+
+			return {
+				content: [{ type: "text", text }],
+				details: { status },
+				isError: status.backend === "none",
+			};
+		},
+	});
+}
--- a/src/resources/extensions/vectordrive/tool-search.ts
+++ b/src/resources/extensions/vectordrive/tool-search.ts
@ -0,0 +1,120 @@
+/**
+ * VectorDrive Search Tool
+ *
+ * Semantic search over stored vectors. Accepts a pre-computed query vector
+ * or raw text (auto-embedded). Falls back to metadata keyword matching
+ * when vectordrive is offline.
+ */
+
+import { Type } from "@sinclair/typebox";
+import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
+import { VectordriveManager, textToVector } from "./manager.js";
+
+export interface ToolExecutionResult {
+	content: Array<{ type: "text"; text: string }>;
+	details: Record<string, unknown>;
+	isError?: boolean;
+}
+
+export function registerVectordriveSearchTool(pi: ExtensionAPI): void {
+	pi.registerTool({
+		name: "vectordrive_search",
+		label: "VectorDrive Search",
+		description:
+			"Search VectorDrive by vector similarity or text query. " +
+			"Returns the most relevant stored entries with similarity scores. " +
+			"When no embedding model is available, a simple hash embedding is used — " +
+			"for best results provide pre-computed vectors via vectordrive_store.",
+		promptSnippet: "Search VectorDrive memories or code chunks",
+		promptGuidelines: [
+			"Use vectordrive_search to find previously stored memories, code chunks, or documents.",
+			"Be specific with queries for better results.",
+			"If you stored code with metadata.file_path, results will include the source location.",
+		],
+		parameters: Type.Object({
+			query: Type.String({
+				description: "Text query to search for (auto-converted to embedding)",
+			}),
+			vector: Type.Optional(
+				Type.Array(Type.Number(), {
+					description: "Optional pre-computed query vector. If provided, overrides 'query' text.",
+				}),
+			),
+			limit: Type.Optional(
+				Type.Number({
+					default: 10,
+					description: "Maximum results (1-50)",
+					minimum: 1,
+					maximum: 50,
+				}),
+			),
+		}),
+		async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
+			const manager = VectordriveManager.getInstance();
+			const status = await manager.getStatus();
+			const limit = Math.min(Math.max(params.limit ?? 10, 1), 50);
+
+			if (status.backend === "none") {
+				return {
+					content: [
+						{
+							type: "text",
+							text: `VectorDrive is unavailable: ${status.error ?? "unknown error"}\n\nInstall with: npm install vectordrive`,
+						},
+					],
+					details: { status },
+					isError: true,
+				};
+			}
+
+			const queryVector =
+				params.vector && params.vector.length > 0
+					? params.vector
+					: textToVector(params.query);
+
+			const results = await manager.search(queryVector, limit);
+
+			if (results.length === 0) {
+				return {
+					content: [
+						{
+							type: "text",
+							text: `No results found in VectorDrive for query: "${params.query}"`,
+						},
+					],
+					details: { query: params.query, count: 0 },
+					isError: false,
+				};
+			}
+
+			const lines: string[] = [];
+			lines.push(`# VectorDrive Search Results`);
+			lines.push(`Query: "${params.query}"`);
+			lines.push("");
+
+			for (const r of results) {
+				const meta = r.metadata ?? {};
+				const preview = meta.text_preview ?? "";
+				lines.push(`## ${r.id} (score: ${r.score.toFixed(4)})`);
+				if (preview) {
+					lines.push("```");
+					lines.push(String(preview).slice(0, 400));
+					lines.push("```");
+				}
+				const metaLines = Object.entries(meta)
+					.filter(([k]) => k !== "text_preview" && k !== "stored_at")
+					.map(([k, v]) => `- ${k}: ${v}`);
+				if (metaLines.length > 0) {
+					lines.push(...metaLines);
+				}
+				lines.push("");
+			}
+
+			return {
+				content: [{ type: "text", text: lines.join("\n") }],
+				details: { results, count: results.length },
+				isError: false,
+			};
+		},
+	});
+}
--- a/src/resources/extensions/vectordrive/tool-store.ts
+++ b/src/resources/extensions/vectordrive/tool-store.ts
@ -0,0 +1,116 @@
+/**
+ * VectorDrive Store Tool
+ *
+ * Store a vector with metadata in the native VectorDb.
+ */
+
+import { Type } from "@sinclair/typebox";
+import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
+import { VectordriveManager, textToVector } from "./manager.js";
+
+export interface ToolExecutionResult {
+	content: Array<{ type: "text"; text: string }>;
+	details: Record<string, unknown>;
+	isError?: boolean;
+}
+
+export function registerVectordriveStoreTool(pi: ExtensionAPI): void {
+	pi.registerTool({
+		name: "vectordrive_store",
+		label: "VectorDrive Store",
+		description:
+			"Store a vector entry in VectorDrive. Accepts either a pre-computed " +
+			"vector array or raw text (a simple hash embedding is generated automatically). " +
+			"Metadata is stored as JSON and returned in search results.",
+		promptSnippet: "Store a memory or code chunk in VectorDrive",
+		parameters: Type.Object({
+			id: Type.String({
+				description: "Unique identifier for this entry (e.g. file-path:line-range)",
+			}),
+			text: Type.Optional(
+				Type.String({
+					description: "Raw text content to store. A simple embedding is auto-generated if 'vector' is not provided.",
+				}),
+			),
+			vector: Type.Optional(
+				Type.Array(Type.Number(), {
+					description: "Pre-computed embedding vector (384 dimensions). Overrides 'text' if provided.",
+				}),
+			),
+			metadata: Type.Optional(
+				Type.Record(Type.String(), Type.Unknown(), {
+					description: "Optional metadata object (e.g. { file_path, line_start, language })",
+				}),
+			),
+		}),
+		async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
+			const manager = VectordriveManager.getInstance();
+			const status = await manager.getStatus();
+
+			if (status.backend === "none") {
+				return {
+					content: [
+						{
+							type: "text",
+							text: `VectorDrive is unavailable: ${status.error ?? "unknown error"}\n\nInstall with: npm install vectordrive`,
+						},
+					],
+					details: { status },
+					isError: true,
+				};
+			}
+
+			const id = params.id.trim();
+			if (!id) {
+				return {
+					content: [{ type: "text", text: "Error: id is required." }],
+					details: { error: "missing_id" },
+					isError: true,
+				};
+			}
+
+			let vector: number[];
+			if (params.vector && params.vector.length > 0) {
+				vector = params.vector;
+			} else if (params.text) {
+				vector = textToVector(params.text);
+			} else {
+				return {
+					content: [
+						{ type: "text", text: "Error: either 'text' or 'vector' must be provided." },
+					],
+					details: { error: "missing_content" },
+					isError: true,
+				};
+			}
+
+			const metadata: Record<string, unknown> = {
+				...(params.metadata ?? {}),
+				stored_at: new Date().toISOString(),
+			};
+			if (params.text) {
+				metadata.text_preview = params.text.slice(0, 200);
+			}
+
+			const ok = await manager.store({ id, vector, metadata });
+			if (!ok) {
+				return {
+					content: [{ type: "text", text: "Error: failed to store entry." }],
+					details: { error: "store_failed" },
+					isError: true,
+				};
+			}
+
+			return {
+				content: [
+					{
+						type: "text",
+						text: `Stored ${id} (${vector.length} dims).`,
+					},
+				],
+				details: { id, dimensions: vector.length, metadata },
+				isError: false,
+			};
+		},
+	});
+}
--- a/src/resources/skills/create-sf-extension/SKILL.md
+++ b/src/resources/skills/create-sf-extension/SKILL.md
@ -0,0 +1,93 @@
+---
+name: create-sf-extension
+description: Create, debug, and iterate on SF extensions (TypeScript modules that add tools, commands, event hooks, custom UI, and providers to SF). Use when asked to build an extension, add a tool the LLM can call, register a slash command, hook into SF events, create custom TUI components, or modify SF behavior. Triggers on "create extension", "build extension", "add a tool", "register command", "hook into sf", "custom tool", "sf plugin", "sf extension".
+---
+
+<essential_principles>
+
+**Extensions are TypeScript modules** that hook into SF's runtime (built on pi). They export a default function receiving `ExtensionAPI` and use it to subscribe to events, register tools/commands/shortcuts, and interact with the session.
+
+**SF extension paths (community/user-installed extensions):**
+- Global: `~/.sf/agent/extensions/*.ts` or `~/.sf/agent/extensions/*/index.ts`
+- Project-local: `.sf/extensions/*.ts` or `.sf/extensions/*/index.ts`
+
+Note: `~/.sf/agent/extensions/` is reserved for bundled extensions synced from the sf package. Community extensions placed there are silently ignored by the loader.
+
+**The three primitives:**
+1. **Events** — Listen and react (`pi.on("event", handler)`). Can block tool calls, modify messages, inject context.
+2. **Tools** — Give the LLM new abilities (`pi.registerTool()`). LLM calls them autonomously.
+3. **Commands** — Give users slash commands (`pi.registerCommand()`). Users type `/mycommand`.
+
+**Non-negotiable rules:**
+- Use `StringEnum` from `@singularity-forge/pi-ai` for string enum params (NOT `Type.Union`/`Type.Literal` — breaks Google's API)
+- Truncate tool output to 50KB / 2000 lines max (use `truncateHead`/`truncateTail` from `@singularity-forge/pi-coding-agent`)
+- Store stateful tool state in `details` for branching support
+- Check `signal?.aborted` in long-running tool executions
+- Use `pi.exec()` not `child_process` for shell commands
+- Check `ctx.hasUI` before dialog methods (non-interactive modes exist)
+- Session control methods (`waitForIdle`, `newSession`, `fork`, `navigateTree`, `reload`) are ONLY available in command handlers — they deadlock in event handlers
+- Lines from `render()` must not exceed `width` — use `truncateToWidth()`
+- Use theme from callback params, never import directly
+- Strip leading `@` from path params in custom tools (some models add it)
+
+**Available imports:**
+
+| Package | Purpose |
+|---------|---------|
+| `@singularity-forge/pi-coding-agent` | `ExtensionAPI`, `ExtensionContext`, `Theme`, event types, tool utilities, `DynamicBorder`, `BorderedLoader`, `CustomEditor`, `highlightCode` |
+| `@sinclair/typebox` | `Type.Object`, `Type.String`, `Type.Number`, `Type.Optional`, `Type.Boolean`, `Type.Array` |
+| `@singularity-forge/pi-ai` | `StringEnum` (required for string enums), `Type` re-export |
+| `@singularity-forge/pi-tui` | `Text`, `Box`, `Container`, `Spacer`, `Markdown`, `SelectList`, `Input`, `matchesKey`, `Key`, `truncateToWidth`, `visibleWidth` |
+| Node.js built-ins | `node:fs`, `node:path`, `node:child_process`, etc. |
+
+</essential_principles>
+
+<routing>
+Based on user intent, route to the appropriate workflow:
+
+**Building a new extension:**
+- "Create an extension", "build a tool", "I want to add a command" → `workflows/create-extension.md`
+
+**Adding capabilities to an existing extension:**
+- "Add a tool to my extension", "add event hook", "add custom rendering" → `workflows/add-capability.md`
+
+**Debugging an extension:**
+- "My extension doesn't work", "tool not showing up", "event not firing" → `workflows/debug-extension.md`
+
+**If user intent is clear from context, skip the question and go directly to the workflow.**
+</routing>
+
+<reference_index>
+All domain knowledge in `references/`:
+
+**Core architecture:** extension-lifecycle.md, events-reference.md
+**API surface:** extensionapi-reference.md, extensioncontext-reference.md
+**Capabilities:** custom-tools.md, custom-commands.md, custom-ui.md, custom-rendering.md
+**Patterns:** state-management.md, system-prompt-modification.md, compaction-session-control.md
+**Infrastructure:** model-provider-management.md, remote-execution-overrides.md, packaging-distribution.md, mode-behavior.md
+**Spec:** `docs/extension-sdk/manifest-spec.md` — manifest format, tiers, validation
+**Testing:** `docs/extension-sdk/testing.md` — mock patterns, test conventions
+**SDK:** `docs/extension-sdk/` — the authoritative SF extension guide
+**Gotchas:** key-rules-gotchas.md
+</reference_index>
+
+<workflows_index>
+| Workflow | Purpose |
+|----------|---------|
+| create-extension.md | Build a new extension from scratch |
+| add-capability.md | Add tools, commands, hooks, UI to an existing extension |
+| debug-extension.md | Diagnose and fix extension issues |
+</workflows_index>
+
+<success_criteria>
+Extension is complete when:
+- `extension-manifest.json` exists with accurate `provides` listing all registered tools/commands/hooks/shortcuts
+- TypeScript compiles without errors (jiti handles this at runtime)
+- Extension loads on SF startup or `/reload` without errors
+- Tools appear in the LLM's system prompt and are callable
+- Commands respond to `/command` input
+- Event hooks fire at the expected lifecycle points
+- Custom UI renders correctly within terminal width
+- State persists correctly across session restarts (if stateful)
+- Output is truncated to safe limits (if tools produce variable output)
+</success_criteria>
--- a/src/resources/skills/create-sf-extension/references/compaction-session-control.md
+++ b/src/resources/skills/create-sf-extension/references/compaction-session-control.md
@ -0,0 +1,77 @@
+<overview>
+Custom compaction hooks, triggering compaction, and session control methods available only in command handlers.
+</overview>
+
+<custom_compaction>
+Override default compaction behavior:
+
+```typescript
+pi.on("session_before_compact", async (event, ctx) => {
+  const { preparation, branchEntries, customInstructions, signal } = event;
+
+  // Option 1: Cancel
+  return { cancel: true };
+
+  // Option 2: Custom summary
+  return {
+    compaction: {
+      summary: "Custom summary of conversation so far...",
+      firstKeptEntryId: preparation.firstKeptEntryId,
+      tokensBefore: preparation.tokensBefore,
+    }
+  };
+});
+```
+</custom_compaction>
+
+<trigger_compaction>
+Trigger compaction programmatically from any handler:
+
+```typescript
+ctx.compact({
+  customInstructions: "Focus on the authentication changes",
+  onComplete: (result) => ctx.ui.notify("Compacted!", "info"),
+  onError: (error) => ctx.ui.notify(`Failed: ${error.message}`, "error"),
+});
+```
+</trigger_compaction>
+
+<session_control>
+**Only available in command handlers** (deadlocks in event handlers):
+
+```typescript
+pi.registerCommand("handoff", {
+  handler: async (args, ctx) => {
+    await ctx.waitForIdle();
+
+    // Create new session with initial context
+    const result = await ctx.newSession({
+      parentSession: ctx.sessionManager.getSessionFile(),
+      setup: async (sm) => {
+        sm.appendMessage({
+          role: "user",
+          content: [{ type: "text", text: `Context: ${args}` }],
+          timestamp: Date.now(),
+        });
+      },
+    });
+
+    if (result.cancelled) { /* extension cancelled via session_before_switch */ }
+  },
+});
+```
+
+| Method | Purpose |
+|--------|---------|
+| `ctx.waitForIdle()` | Wait for agent to finish streaming |
+| `ctx.newSession(options?)` | Create a new session |
+| `ctx.fork(entryId)` | Fork from a specific entry |
+| `ctx.navigateTree(targetId, options?)` | Navigate session tree (with optional summary) |
+| `ctx.reload()` | Hot-reload everything (treat as terminal — code after runs pre-reload version) |
+
+`navigateTree` options:
+- `summarize: boolean` — generate summary of abandoned branch
+- `customInstructions: string` — instructions for summarizer
+- `replaceInstructions: boolean` — replace default prompt entirely
+- `label: string` — label to attach to branch summary
+</session_control>
--- a/src/resources/skills/create-sf-extension/references/custom-commands.md
+++ b/src/resources/skills/create-sf-extension/references/custom-commands.md
@ -0,0 +1,139 @@
+<overview>
+Custom slash commands — registration, argument completions, subcommand patterns, and the extended command context.
+</overview>
+
+<basic_registration>
+```typescript
+pi.registerCommand("deploy", {
+  description: "Deploy to an environment",
+  handler: async (args, ctx) => {
+    // args = everything after "/deploy "
+    // ctx = ExtensionCommandContext (has session control methods)
+    ctx.ui.notify(`Deploying to ${args || "production"}`, "info");
+  },
+});
+```
+</basic_registration>
+
+<argument_completions>
+Add tab-completion for command arguments:
+
+```typescript
+import type { AutocompleteItem } from "@singularity-forge/pi-tui";
+
+pi.registerCommand("deploy", {
+  description: "Deploy to an environment",
+  getArgumentCompletions: (prefix: string): AutocompleteItem[] | null => {
+    const envs = ["dev", "staging", "prod"];
+    const items = envs.map(e => ({ value: e, label: e }));
+    const filtered = items.filter(i => i.value.startsWith(prefix));
+    return filtered.length > 0 ? filtered : null;
+  },
+  handler: async (args, ctx) => {
+    ctx.ui.notify(`Deploying to ${args}`, "info");
+  },
+});
+```
+</argument_completions>
+
+<subcommand_pattern>
+Fake nested commands via first-argument parsing. Used by `/wt new|ls|switch|merge|rm`.
+
+```typescript
+pi.registerCommand("foo", {
+  description: "Manage foo items: /foo new|list|delete [name]",
+
+  getArgumentCompletions: (prefix: string) => {
+    const parts = prefix.trim().split(/\s+/);
+
+    // First arg: subcommand
+    if (parts.length <= 1) {
+      return ["new", "list", "delete"]
+        .filter(cmd => cmd.startsWith(parts[0] ?? ""))
+        .map(cmd => ({ value: cmd, label: cmd }));
+    }
+
+    // Second arg: depends on subcommand
+    if (parts[0] === "delete") {
+      const items = getItemsSomehow();
+      return items
+        .filter(name => name.startsWith(parts[1] ?? ""))
+        .map(name => ({ value: `delete ${name}`, label: name }));
+    }
+
+    return [];
+  },
+
+  handler: async (args, ctx) => {
+    const parts = args.trim().split(/\s+/);
+    const sub = parts[0];
+
+    switch (sub) {
+      case "new": /* ... */ return;
+      case "list": /* ... */ return;
+      case "delete": /* handle parts[1] */ return;
+      default:
+        ctx.ui.notify("Usage: /foo <new|list|delete> [name]", "info");
+    }
+  },
+});
+```
+
+**Gotcha:** `"".trim().split(/\s+/)` produces `['']`, not `[]`. That's why `parts.length <= 1` handles both empty and partial first arg.
+</subcommand_pattern>
+
+<command_context>
+Command handlers get `ExtensionCommandContext` which extends `ExtensionContext` with session control methods:
+
+| Method | Purpose |
+|--------|---------|
+| `ctx.waitForIdle()` | Wait for agent to finish streaming |
+| `ctx.newSession(options?)` | Create a new session |
+| `ctx.fork(entryId)` | Fork from an entry |
+| `ctx.navigateTree(targetId, options?)` | Navigate session tree |
+| `ctx.reload()` | Hot-reload everything |
+
+**⚠️ These methods are ONLY available in command handlers.** Calling them from event handlers causes deadlocks.
+
+```typescript
+pi.registerCommand("handoff", {
+  handler: async (args, ctx) => {
+    await ctx.waitForIdle();
+    await ctx.newSession({
+      setup: async (sm) => {
+        sm.appendMessage({
+          role: "user",
+          content: [{ type: "text", text: `Context: ${args}` }],
+          timestamp: Date.now(),
+        });
+      },
+    });
+  },
+});
+```
+</command_context>
+
+<reload_pattern>
+Expose reload as both a command and a tool the LLM can call:
+
+```typescript
+pi.registerCommand("reload-runtime", {
+  description: "Reload extensions, skills, prompts, and themes",
+  handler: async (_args, ctx) => {
+    await ctx.reload();
+    return;  // Treat reload as terminal
+  },
+});
+
+pi.registerTool({
+  name: "reload_runtime",
+  label: "Reload Runtime",
+  description: "Reload extensions, skills, prompts, and themes",
+  parameters: Type.Object({}),
+  async execute() {
+    pi.sendUserMessage("/reload-runtime", { deliverAs: "followUp" });
+    return { content: [{ type: "text", text: "Queued /reload-runtime as follow-up." }] };
+  },
+});
+```
+</reload_pattern>
--- a/src/resources/skills/create-sf-extension/references/custom-rendering.md
+++ b/src/resources/skills/create-sf-extension/references/custom-rendering.md
@ -0,0 +1,108 @@
+<overview>
+Custom rendering for tools and messages — control how they appear in the TUI.
+</overview>
+
+<tool_rendering>
+Tools can provide `renderCall` (how the call looks) and `renderResult` (how the result looks):
+
+```typescript
+import { Text } from "@singularity-forge/pi-tui";
+import { keyHint } from "@singularity-forge/pi-coding-agent";
+
+pi.registerTool({
+  name: "my_tool",
+  // ...
+
+  renderCall(args, theme) {
+    let text = theme.fg("toolTitle", theme.bold("my_tool "));
+    text += theme.fg("muted", args.action);
+    if (args.text) text += " " + theme.fg("dim", `"${args.text}"`);
+    return new Text(text, 0, 0);  // 0,0 padding — Box handles it
+  },
+
+  renderResult(result, { expanded, isPartial }, theme) {
+    // isPartial = true during streaming (onUpdate was called)
+    if (isPartial) {
+      return new Text(theme.fg("warning", "Processing..."), 0, 0);
+    }
+
+    // expanded = user toggled expand (Ctrl+O)
+    if (result.details?.error) {
+      return new Text(theme.fg("error", `Error: ${result.details.error}`), 0, 0);
+    }
+
+    let text = theme.fg("success", "✓ Done");
+    if (!expanded) {
+      text += ` (${keyHint("expandTools", "to expand")})`;
+    }
+    if (expanded && result.details?.items) {
+      for (const item of result.details.items) {
+        text += "\n  " + theme.fg("dim", item);
+      }
+    }
+    return new Text(text, 0, 0);
+  },
+});
+```
+
+If you omit `renderCall`/`renderResult`, the built-in renderer is used. Useful for tool overrides where you just wrap logic without reimplementing UI.
+
+**Fallback:** If render methods throw, `renderCall` shows tool name, `renderResult` shows raw `content` text.
+</tool_rendering>
+
+<key_hints>
+Key hint helpers for showing keybinding info in render output:
+
+```typescript
+import { keyHint, appKeyHint, editorKey, rawKeyHint } from "@singularity-forge/pi-coding-agent";
+
+// Editor action hint (respects user keybinding config)
+keyHint("expandTools", "to expand")    // e.g., "Ctrl+O to expand"
+keyHint("selectConfirm", "to select")
+
+// Raw key hint (always shows literal key)
+rawKeyHint("Ctrl+O", "to expand")
+```
+</key_hints>
+
+<message_rendering>
+Register a renderer for custom message types:
+
+```typescript
+import { Text } from "@singularity-forge/pi-tui";
+
+pi.registerMessageRenderer("my-extension", (message, options, theme) => {
+  const { expanded } = options;
+  let text = theme.fg("accent", `[${message.customType}] `) + message.content;
+  if (expanded && message.details) {
+    text += "\n" + theme.fg("dim", JSON.stringify(message.details, null, 2));
+  }
+  return new Text(text, 0, 0);
+});
+
+// Send messages that use this renderer:
+pi.sendMessage({
+  customType: "my-extension",  // Matches renderer name
+  content: "Status update",
+  display: true,
+  details: { foo: "bar" },
+});
+```
+</message_rendering>
+
+<syntax_highlighting>
+```typescript
+import { highlightCode, getLanguageFromPath } from "@singularity-forge/pi-coding-agent";
+
+const lang = getLanguageFromPath("/path/to/file.rs");  // "rust"
+const highlighted = highlightCode(code, lang, theme);
+```
+</syntax_highlighting>
+
+<best_practices>
+- Return `Text` with padding `(0, 0)` — the wrapping `Box` handles padding
+- Support `expanded` for detail on demand
+- Handle `isPartial` for streaming progress
+- Keep collapsed view compact
+- Use `\n` for multi-line content within a single `Text`
+</best_practices>
--- a/src/resources/skills/create-sf-extension/references/custom-tools.md
+++ b/src/resources/skills/create-sf-extension/references/custom-tools.md
@ -0,0 +1,183 @@
+<overview>
+Complete custom tools reference — registration, parameters, execution, output truncation, overrides, rendering, and dynamic registration.
+</overview>
+
+<registration>
+```typescript
+import { Type } from "@sinclair/typebox";
+import { StringEnum } from "@singularity-forge/pi-ai";
+
+pi.registerTool({
+  name: "my_tool",                    // Unique identifier (snake_case)
+  label: "My Tool",                   // Display name in TUI
+  description: "What this does",      // Full description shown to LLM
+
+  // Optional: one-liner for system prompt "Available tools" section
+  promptSnippet: "Manage project todo items",
+
+  // Optional: bullets added to system prompt "Guidelines" when tool is active
+  promptGuidelines: [
+    "Use my_tool for task management instead of file edits."
+  ],
+
+  // Parameter schema (MUST use TypeBox)
+  parameters: Type.Object({
+    action: StringEnum(["list", "add", "remove"] as const),
+    text: Type.Optional(Type.String({ description: "Item text" })),
+    id: Type.Optional(Type.Number({ description: "Item ID" })),
+  }),
+
+  async execute(toolCallId, params, signal, onUpdate, ctx) {
+    // 1. Check cancellation
+    if (signal?.aborted) {
+      return { content: [{ type: "text", text: "Cancelled" }] };
+    }
+
+    // 2. Stream progress (optional)
+    onUpdate?.({
+      content: [{ type: "text", text: "Working..." }],
+      details: { progress: 50 },
+    });
+
+    // 3. Do the work
+    const result = await doWork(params);
+
+    // 4. Return result
+    return {
+      content: [{ type: "text", text: "Result text for LLM" }],  // Sent to LLM context
+      details: { data: result },                                   // For rendering & state
+    };
+  },
+
+  // Optional: custom TUI rendering
+  renderCall(args, theme) { ... },
+  renderResult(result, { expanded, isPartial }, theme) { ... },
+});
+```
+</registration>
+
+<critical_stringenum>
+**⚠️ MUST use `StringEnum` for string enum parameters:**
+
+```typescript
+import { StringEnum } from "@singularity-forge/pi-ai";
+
+// ✅ Correct — works with all providers including Google
+action: StringEnum(["list", "add", "remove"] as const)
+
+// ❌ BROKEN with Google's API
+action: Type.Union([Type.Literal("list"), Type.Literal("add")])
+```
+</critical_stringenum>
+
+<output_truncation>
+Tools MUST truncate output to avoid context overflow. Built-in limit: 50KB / 2000 lines.
+
+```typescript
+import {
+  truncateHead, truncateTail, formatSize,
+  DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES,
+} from "@singularity-forge/pi-coding-agent";
+
+async execute(toolCallId, params, signal, onUpdate, ctx) {
+  const output = await runCommand();
+  const truncation = truncateHead(output, {
+    maxLines: DEFAULT_MAX_LINES,
+    maxBytes: DEFAULT_MAX_BYTES,
+  });
+
+  let result = truncation.content;
+  if (truncation.truncated) {
+    const tempFile = writeTempFile(output);
+    result += `\n\n[Output truncated: ${truncation.outputLines}/${truncation.totalLines} lines`;
+    result += ` (${formatSize(truncation.outputBytes)}/${formatSize(truncation.totalBytes)}).`;
+    result += ` Full output: ${tempFile}]`;
+  }
+  return { content: [{ type: "text", text: result }] };
+}
+```
+
+Use `truncateHead` when beginning matters (search results, file reads). Use `truncateTail` when end matters (logs, command output).
+</output_truncation>
+
+<signaling_errors>
+Throw to signal an error (sets `isError: true`). Returning a value never sets error flag.
+
+```typescript
+async execute(toolCallId, params) {
+  if (!isValid(params.input)) {
+    throw new Error(`Invalid input: ${params.input}`);
+  }
+  return { content: [{ type: "text", text: "OK" }], details: {} };
+}
+```
+</signaling_errors>
+
+<dynamic_registration>
+Tools can be registered at any time — during load, in `session_start`, in command handlers. Available immediately without `/reload`.
+
+```typescript
+pi.on("session_start", async (_event, ctx) => {
+  pi.registerTool({ name: "dynamic_tool", ... });
+});
+```
+
+Use `pi.setActiveTools(names)` to enable/disable tools at runtime.
+</dynamic_registration>
+
+<overriding_builtins>
+Register a tool with the same name as a built-in (`read`, `bash`, `edit`, `write`, `grep`, `find`, `ls`) to override it. **Must match exact result shape including `details` type.**
+
+```typescript
+import { createReadTool } from "@singularity-forge/pi-coding-agent";
+
+pi.registerTool({
+  name: "read",
+  label: "Read (Logged)",
+  description: "Read file contents with logging",
+  parameters: Type.Object({
+    path: Type.String(),
+    offset: Type.Optional(Type.Number()),
+    limit: Type.Optional(Type.Number()),
+  }),
+  async execute(toolCallId, params, signal, onUpdate, ctx) {
+    console.log(`[AUDIT] Reading: ${params.path}`);
+    const builtIn = createReadTool(ctx.cwd);
+    return builtIn.execute(toolCallId, params, signal, onUpdate);
+  },
+  // Omit renderCall/renderResult to use built-in renderer
+});
+```
+
+Start with no built-in tools: `sf --no-tools -e ./my-extension.ts`
+</overriding_builtins>
+
+<multiple_tools>
+One extension can register multiple tools with shared state:
+
+```typescript
+export default function (pi: ExtensionAPI) {
+  let connection = null;
+
+  pi.registerTool({ name: "db_connect", ... });
+  pi.registerTool({ name: "db_query", ... });
+  pi.registerTool({ name: "db_close", ... });
+
+  pi.on("session_shutdown", async () => {
+    connection?.close();
+  });
+}
+```
+</multiple_tools>
+
+<path_normalization>
+Some models add `@` prefix to path arguments. Strip it:
+
+```typescript
+async execute(toolCallId, params, signal, onUpdate, ctx) {
+  let path = params.path;
+  if (path.startsWith("@")) path = path.slice(1);
+  // ...
+}
+```
+</path_normalization>
--- a/src/resources/skills/create-sf-extension/references/custom-ui.md
+++ b/src/resources/skills/create-sf-extension/references/custom-ui.md
@ -0,0 +1,490 @@
+<overview>
+Complete custom UI reference — dialogs, persistent elements, custom components, overlays, custom editors, built-in components, keyboard input, performance, theming, and common mistakes.
+</overview>
+
+<ui_architecture>
+```
+┌─────────────────────────────────────────────────┐
+│  Custom Header (ctx.ui.setHeader)               │
+├─────────────────────────────────────────────────┤
+│  Message Area                                   │
+│  - User/assistant messages                      │
+│  - Tool calls ◄── renderCall/renderResult       │
+│  - Custom messages ◄── registerMessageRenderer  │
+├─────────────────────────────────────────────────┤
+│  Widgets (above editor) ◄── ctx.ui.setWidget    │
+├─────────────────────────────────────────────────┤
+│  Editor ◄── ctx.ui.custom() / setEditorComponent│
+├─────────────────────────────────────────────────┤
+│  Widgets (below editor) ◄── ctx.ui.setWidget    │
+├─────────────────────────────────────────────────┤
+│  Footer ◄── ctx.ui.setFooter / setStatus        │
+└─────────────────────────────────────────────────┘
+  ┌─────────────────────┐
+  │  Overlay (floating)  │ ◄── ctx.ui.custom({ overlay })
+  └─────────────────────┘
+```
+
+**11 ways to get UI on screen:**
+
+| Method | Blocks? | Replaces editor? |
+|--------|---------|-------------------|
+| `ctx.ui.select/confirm/input/editor` | Yes | Temporarily |
+| `ctx.ui.notify` | No | No |
+| `ctx.ui.setStatus` | No | No (footer) |
+| `ctx.ui.setWidget` | No | No |
+| `ctx.ui.setFooter` | No | No (replaces footer) |
+| `ctx.ui.setHeader` | No | No (replaces header) |
+| `ctx.ui.custom()` | Yes | Temporarily |
+| `ctx.ui.custom({overlay})` | Yes | No (renders on top) |
+| `ctx.ui.setEditorComponent` | No | Yes (permanently) |
+| `renderCall/renderResult` | No | No (inline in messages) |
+| `registerMessageRenderer` | No | No (inline in messages) |
+</ui_architecture>
+
+<component_interface>
+Every visual element implements:
+
+```typescript
+interface Component {
+  render(width: number): string[];   // Required — each line ≤ width visible chars
+  handleInput?(data: string): void;  // Optional — receive keyboard input
+  wantsKeyRelease?: boolean;         // Optional — receive key release events (Kitty protocol)
+  invalidate(): void;                // Required — clear cached render state
+}
+```
+
+**Render contract:**
+- Return array of strings, one per line
+- Each string MUST NOT exceed `width` in visible characters
+- ANSI escape codes don't count toward visible width
+- **Styles are reset at end of each line** — reapply per line
+- Return `[]` for zero-height component
+
+**Invalidation contract:**
+- Clear ALL cached render output
+- Clear any pre-baked themed strings
+- Call `super.invalidate()` if extending a built-in component
+</component_interface>
+
+<dialogs>
+Blocking dialog methods on `ctx.ui`:
+
+```typescript
+const choice = await ctx.ui.select("Pick one:", ["A", "B", "C"]);       // string | undefined
+const ok = await ctx.ui.confirm("Delete?", "This cannot be undone");    // boolean
+const name = await ctx.ui.input("Name:", "placeholder");                // string | undefined
+const text = await ctx.ui.editor("Edit:", "prefilled text");            // string | undefined
+
+// Timed auto-dismiss with countdown
+const ok = await ctx.ui.confirm("Proceed?", "Auto-continues in 5s", { timeout: 5000 });
+// Returns false on timeout, undefined for select/input
+
+// Manual dismissal with AbortSignal (distinguish timeout from cancel)
+const controller = new AbortController();
+const timeoutId = setTimeout(() => controller.abort(), 5000);
+const ok = await ctx.ui.confirm("Timed", "Auto-cancels in 5s", { signal: controller.signal });
+clearTimeout(timeoutId);
+if (controller.signal.aborted) { /* timed out */ }
+```
+</dialogs>
+
+<persistent_ui>
+```typescript
+// Footer status (multiple extensions can set independent entries)
+ctx.ui.setStatus("my-ext", "● Active");
+ctx.ui.setStatus("my-ext", undefined);  // Clear
+
+// Widgets
+ctx.ui.setWidget("my-id", ["Line 1", "Line 2"]);                       // Above editor
+ctx.ui.setWidget("my-id", ["Below"], { placement: "belowEditor" });    // Below editor
+ctx.ui.setWidget("my-id", (_tui, theme) => ({                          // Themed
+  render: () => [theme.fg("accent", "Styled")],
+  invalidate: () => {},
+}));
+ctx.ui.setWidget("my-id", undefined);  // Clear
+
+// Working message during streaming
+ctx.ui.setWorkingMessage("Analyzing code...");
+ctx.ui.setWorkingMessage();  // Restore default
+
+// Custom footer (full replacement)
+ctx.ui.setFooter((tui, theme, footerData) => ({
+  render(width) {
+    const branch = footerData.getGitBranch();          // Only available here
+    const statuses = footerData.getExtensionStatuses(); // All setStatus values
+    return [truncateToWidth(`${branch} | model`, width)];
+  },
+  invalidate() {},
+  dispose: footerData.onBranchChange(() => tui.requestRender()),  // Reactive
+}));
+ctx.ui.setFooter(undefined);  // Restore default
+
+// Custom header
+ctx.ui.setHeader((tui, theme) => ({
+  render(width) { return [theme.fg("accent", theme.bold("My Header"))]; },
+  invalidate() {},
+}));
+
+// Editor control
+ctx.ui.setEditorText("Prefill");
+const current = ctx.ui.getEditorText();
+ctx.ui.pasteToEditor("pasted content");  // Triggers paste handling
+
+// Tool expansion
+ctx.ui.setToolsExpanded(true);
+const expanded = ctx.ui.getToolsExpanded();
+
+// Theme management
+const themes = ctx.ui.getAllThemes();
+ctx.ui.setTheme("light");
+ctx.ui.theme.fg("accent", "text");  // Access current theme
+```
+</persistent_ui>
+
+<custom_components>
+`ctx.ui.custom()` temporarily replaces the editor. Returns a value when `done()` is called.
+
+**Factory callback args:**
+
+| Argument | Type | Purpose |
+|----------|------|---------|
+| `tui` | `TUI` | `tui.requestRender()` triggers re-render after state changes |
+| `theme` | `Theme` | Current theme for styling |
+| `keybindings` | `KeybindingsManager` | App keybinding config |
+| `done` | `(value: T) => void` | Close component and return value |
+
+**Inline pattern:**
+```typescript
+const result = await ctx.ui.custom<string | null>((tui, theme, keybindings, done) => ({
+  render(width: number): string[] {
+    return [truncateToWidth("Press Enter to confirm, Escape to cancel", width)];
+  },
+  handleInput(data: string) {
+    if (matchesKey(data, Key.enter)) done("confirmed");
+    if (matchesKey(data, Key.escape)) done(null);
+  },
+  invalidate() {},
+}));
+```
+
+**Class-based pattern (recommended for complex UI):**
+```typescript
+class MyComponent {
+  private selected = 0;
+  private cachedWidth?: number;
+  private cachedLines?: string[];
+
+  constructor(
+    private tui: { requestRender: () => void },
+    private theme: Theme,
+    private items: string[],
+    private done: (value: string | null) => void,
+  ) {}
+
+  handleInput(data: string) {
+    if (matchesKey(data, Key.up) && this.selected > 0) this.selected--;
+    else if (matchesKey(data, Key.down) && this.selected < this.items.length - 1) this.selected++;
+    else if (matchesKey(data, Key.enter)) { this.done(this.items[this.selected]); return; }
+    else if (matchesKey(data, Key.escape)) { this.done(null); return; }
+    else return;
+    this.invalidate();
+    this.tui.requestRender();
+  }
+
+  render(width: number): string[] {
+    if (this.cachedLines && this.cachedWidth === width) return this.cachedLines;
+    this.cachedLines = this.items.map((item, i) =>
+      truncateToWidth((i === this.selected ? "> " : "  ") + item, width)
+    );
+    this.cachedWidth = width;
+    return this.cachedLines;
+  }
+
+  invalidate() { this.cachedWidth = undefined; this.cachedLines = undefined; }
+}
+
+const result = await ctx.ui.custom<string | null>((tui, theme, _kb, done) =>
+  new MyComponent(tui, theme, ["A", "B", "C"], done)
+);
+```
+
+**Composing with built-in components:**
+```typescript
+const result = await ctx.ui.custom<string | null>((tui, theme, _kb, done) => {
+  const container = new Container();
+  container.addChild(new DynamicBorder((s: string) => theme.fg("accent", s)));
+  container.addChild(new Text(theme.fg("accent", theme.bold("Title")), 1, 0));
+
+  const selectList = new SelectList(items, 10, {
+    selectedPrefix: (t) => theme.fg("accent", t),
+    selectedText: (t) => theme.fg("accent", t),
+    description: (t) => theme.fg("muted", t),
+    scrollInfo: (t) => theme.fg("dim", t),
+    noMatch: (t) => theme.fg("warning", t),
+  });
+  selectList.onSelect = (item) => done(item.value);
+  selectList.onCancel = () => done(null);
+  container.addChild(selectList);
+
+  return {
+    render: (w) => container.render(w),
+    invalidate: () => container.invalidate(),
+    handleInput: (data) => { selectList.handleInput(data); tui.requestRender(); },
+  };
+});
+```
+</custom_components>
+
+<overlays>
+Floating modals rendered on top of everything:
+
+```typescript
+const result = await ctx.ui.custom<string | null>(
+  (tui, theme, _kb, done) => new MyDialog({ onClose: done }),
+  {
+    overlay: true,
+    overlayOptions: {
+      anchor: "center",         // 9 positions (see below)
+      width: "50%",             // number = columns, string = percentage
+      minWidth: 40,
+      maxHeight: "80%",
+      margin: 2,                // All sides, or { top, right, bottom, left }
+      offsetX: 0, offsetY: 0,  // Fine-tune position
+      visible: (w, h) => w >= 80,  // Hide on narrow terminals
+    },
+    onHandle: (handle) => {
+      // handle.setHidden(true/false) — temporarily hide
+      // handle.hide() — permanently remove
+    },
+  }
+);
+```
+
+**Anchor positions:**
+```
+top-left      top-center      top-right
+left-center      center      right-center
+bottom-left  bottom-center  bottom-right
+```
+
+**Stacked overlays:** Multiple overlays stack (newest on top). Closing one gives focus to the one below.
+
+**⚠️ Overlay lifecycle:** Components are disposed when closed. Never reuse references — create fresh instances each time.
+</overlays>
+
+<custom_editor>
+Replace the main input editor permanently:
+
+```typescript
+import { CustomEditor } from "@singularity-forge/pi-coding-agent";
+
+class VimEditor extends CustomEditor {
+  private mode: "normal" | "insert" = "insert";
+
+  handleInput(data: string): void {
+    if (matchesKey(data, "escape") && this.mode === "insert") {
+      this.mode = "normal"; return;
+    }
+    if (this.mode === "insert") { super.handleInput(data); return; }
+    switch (data) {
+      case "i": this.mode = "insert"; return;
+      case "h": super.handleInput("\x1b[D"); return;  // Left
+      case "j": super.handleInput("\x1b[B"); return;  // Down
+      case "k": super.handleInput("\x1b[A"); return;  // Up
+      case "l": super.handleInput("\x1b[C"); return;  // Right
+    }
+    if (data.length === 1 && data.charCodeAt(0) >= 32) return;  // Block printable in normal
+    super.handleInput(data);
+  }
+}
+
+ctx.ui.setEditorComponent((_tui, theme, keybindings) => new VimEditor(theme, keybindings));
+ctx.ui.setEditorComponent(undefined);  // Restore default
+```
+
+**Critical:** Extend `CustomEditor` (NOT `Editor`) to get app keybindings (escape to abort, ctrl+d, model switching).
+</custom_editor>
+
+<built_in_components>
+**From `@singularity-forge/pi-tui`:**
+
+| Component | Constructor | Purpose |
+|-----------|-------------|---------|
+| `Text` | `new Text(content, paddingX, paddingY, bgFn?)` | Multi-line text with word wrap |
+| `Box` | `new Box(paddingX, paddingY, bgFn)` | Container with padding+background, `.addChild()` |
+| `Container` | `new Container()` | Vertical stack, `.addChild()`, `.removeChild()`, `.clear()` |
+| `Spacer` | `new Spacer(lines)` | Empty vertical space |
+| `Markdown` | `new Markdown(content, padX, padY, getMarkdownTheme())` | Rendered markdown with syntax highlighting |
+| `Image` | `new Image(base64, mimeType, theme, opts?)` | Image rendering (Kitty, iTerm2) |
+| `SelectList` | `new SelectList(items, maxVisible, themeOpts)` | Interactive selection with search and scrolling |
+| `SettingsList` | `new SettingsList(items, maxVisible, theme, onChange, onClose, opts?)` | Toggle settings with left/right arrows |
+| `Input` | `new Input()` | Text input field |
+| `Editor` | `new Editor(tui, editorTheme)` | Multi-line editor with undo |
+
+**SelectList usage:**
+```typescript
+const items: SelectItem[] = [
+  { value: "opt1", label: "Option 1", description: "First option" },
+  { value: "opt2", label: "Option 2" },
+];
+const selectList = new SelectList(items, 10, {
+  selectedPrefix: (t) => theme.fg("accent", t),
+  selectedText: (t) => theme.fg("accent", t),
+  description: (t) => theme.fg("muted", t),
+  scrollInfo: (t) => theme.fg("dim", t),
+  noMatch: (t) => theme.fg("warning", t),
+});
+selectList.onSelect = (item) => { /* item.value */ };
+selectList.onCancel = () => { /* escape pressed */ };
+```
+
+**SettingsList usage:**
+```typescript
+const items: SettingItem[] = [
+  { id: "verbose", label: "Verbose mode", currentValue: "off", values: ["on", "off"] },
+  { id: "theme", label: "Theme", currentValue: "dark", values: ["dark", "light", "auto"] },
+];
+const settings = new SettingsList(items, 15, getSettingsListTheme(),
+  (id, newValue) => { /* setting changed */ },
+  () => { /* close requested */ },
+  { enableSearch: true },
+);
+```
+
+**From `@singularity-forge/pi-coding-agent`:**
+
+| Component | Constructor | Purpose |
+|-----------|-------------|---------|
+| `DynamicBorder` | `new DynamicBorder((s: string) => theme.fg("accent", s))` | Border line |
+| `BorderedLoader` | — | Spinner with cancel support |
+| `CustomEditor` | `new CustomEditor(theme, keybindings)` | Base class for custom editors |
+</built_in_components>
+
+<keyboard_input>
+```typescript
+import { matchesKey, Key } from "@singularity-forge/pi-tui";
+
+handleInput(data: string) {
+  // Basic keys
+  if (matchesKey(data, Key.up)) {}
+  if (matchesKey(data, Key.down)) {}
+  if (matchesKey(data, Key.enter)) {}
+  if (matchesKey(data, Key.escape)) {}
+  if (matchesKey(data, Key.tab)) {}
+  if (matchesKey(data, Key.space)) {}
+  if (matchesKey(data, Key.backspace)) {}
+  if (matchesKey(data, Key.home)) {}
+  if (matchesKey(data, Key.end)) {}
+
+  // With modifiers
+  if (matchesKey(data, Key.ctrl("c"))) {}
+  if (matchesKey(data, Key.shift("tab"))) {}
+  if (matchesKey(data, Key.alt("left"))) {}
+  if (matchesKey(data, Key.ctrlShift("p"))) {}
+
+  // String format also works: "enter", "ctrl+c", "shift+tab"
+
+  // Printable character detection
+  if (data.length === 1 && data.charCodeAt(0) >= 32) {
+    // Letter, number, symbol
+  }
+}
+```
+
+**handleInput contract:**
+1. Check for your keys
+2. Update state
+3. Call `this.invalidate()` if render output changes
+4. Call `tui.requestRender()` to trigger re-render
+</keyboard_input>
+
+<line_width_rule>
+**Cardinal rule: each line from render() must not exceed `width` visible characters.**
+
+```typescript
+import { visibleWidth, truncateToWidth, wrapTextWithAnsi } from "@singularity-forge/pi-tui";
+
+visibleWidth("\x1b[32mHello\x1b[0m");  // Returns 5 (ignores ANSI codes)
+truncateToWidth("Very long text here", 10);         // "Very lo..."
+truncateToWidth("Very long text here", 10, "");      // "Very long " (no ellipsis)
+wrapTextWithAnsi("\x1b[32mLong green text\x1b[0m", 10);  // Word wrap preserving ANSI
+```
+
+If lines exceed `width`, terminal wraps cause visual corruption.
+</line_width_rule>
+
+<performance_caching>
+Always cache render output:
+
+```typescript
+class CachedComponent {
+  private cachedWidth?: number;
+  private cachedLines?: string[];
+
+  render(width: number): string[] {
+    if (this.cachedLines && this.cachedWidth === width) return this.cachedLines;
+    const lines = this.computeLines(width);
+    this.cachedWidth = width;
+    this.cachedLines = lines;
+    return lines;
+  }
+
+  invalidate() { this.cachedWidth = undefined; this.cachedLines = undefined; }
+}
+```
+
+**Update cycle:** State changes → `invalidate()` → `tui.requestRender()` → `render(width)` called
+
+**Game loop pattern** (real-time updates):
+```typescript
+this.interval = setInterval(() => {
+  this.tick();
+  this.version++;
+  this.tui.requestRender();
+}, 100);  // 10 FPS
+
+// Clean up in dispose()
+clearInterval(this.interval);
+```
+</performance_caching>
+
+<theme_colors>
+Always use theme from callback params, never import directly.
+
+**All foreground colors:**
+
+| Category | Colors |
+|----------|--------|
+| General | `text`, `accent`, `muted`, `dim` |
+| Status | `success`, `error`, `warning` |
+| Borders | `border`, `borderAccent`, `borderMuted` |
+| Messages | `userMessageText`, `customMessageText`, `customMessageLabel` |
+| Tools | `toolTitle`, `toolOutput` |
+| Diffs | `toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext` |
+| Markdown | `mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet` |
+| Syntax | `syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation` |
+| Thinking | `thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh` |
+
+**All background colors:** `selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`
+
+**Syntax highlighting:**
+```typescript
+import { highlightCode, getLanguageFromPath } from "@singularity-forge/pi-coding-agent";
+const lang = getLanguageFromPath("/file.rs");  // "rust"
+const highlighted = highlightCode(code, lang, theme);
+```
+</theme_colors>
+
+<common_mistakes>
+1. **Lines exceed width** → Visual corruption. Use `truncateToWidth()` on every line.
+2. **Forgetting `tui.requestRender()`** → UI doesn't update. Call after invalidate().
+3. **Importing theme directly** → Wrong colors after theme switch. Use theme from callback.
+4. **Not typing DynamicBorder param** → `new DynamicBorder((s: string) => theme.fg("accent", s))`.
+5. **Reusing disposed overlay components** → Create fresh instances each time.
+6. **Styles bleeding across lines** → TUI resets per line. Reapply styles, or use `wrapTextWithAnsi()`.
+7. **Not implementing invalidate()** → Theme changes don't take effect.
+8. **Forgetting super.invalidate()** → `override invalidate() { super.invalidate(); /* cleanup */ }`
+9. **Timer not cleaned up** → Call `clearInterval` before `done()`.
+10. **Using ctx.ui in non-interactive mode** → Check `ctx.hasUI` first.
+</common_mistakes>
--- a/src/resources/skills/create-sf-extension/references/events-reference.md
+++ b/src/resources/skills/create-sf-extension/references/events-reference.md
@ -0,0 +1,126 @@
+<overview>
+Complete event reference with handler signatures, return types, and type narrowing utilities.
+</overview>
+
+<event_categories>
+
+**Session events:** `session_start`, `session_before_switch`, `session_switch`, `session_before_fork`, `session_fork`, `session_before_compact`, `session_compact`, `session_before_tree`, `session_tree`, `session_shutdown`
+
+**Agent events:** `before_agent_start`, `agent_start`, `agent_end`, `turn_start`, `turn_end`, `context`, `before_provider_request`, `message_start`, `message_update`, `message_end`
+
+**Tool events:** `tool_call`, `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `tool_result`
+
+**Input events:** `input`
+
+**Model events:** `model_select`
+
+**User bash events:** `user_bash`
+
+**Special:** `session_directory` (CLI startup only, no `ctx` — receives only event)
+
+</event_categories>
+
+<handler_signature>
+```typescript
+pi.on("event_name", async (event, ctx: ExtensionContext) => {
+  // event — typed payload for this event
+  // ctx — access to UI, session, model, control flow
+  // Return undefined for no action, or a typed response
+});
+```
+</handler_signature>
+
+<key_events>
+
+**before_agent_start** — Fired after user prompt, before agent loop. Primary hook for context injection and system prompt modification.
+```typescript
+pi.on("before_agent_start", async (event, ctx) => {
+  // event.prompt — user's prompt text
+  // event.images — attached images
+  // event.systemPrompt — current system prompt
+  return {
+    message: { customType: "my-ext", content: "Extra context", display: true },
+    systemPrompt: event.systemPrompt + "\n\nExtra instructions...",
+  };
+});
+```
+
+**tool_call** — Fired before tool executes. Can block.
+```typescript
+import { isToolCallEventType } from "@singularity-forge/pi-coding-agent";
+
+pi.on("tool_call", async (event, ctx) => {
+  if (isToolCallEventType("bash", event)) {
+    // event.input is typed as { command: string; timeout?: number }
+    if (event.input.command.includes("rm -rf")) {
+      return { block: true, reason: "Dangerous command" };
+    }
+  }
+});
+```
+
+**tool_result** — Fired after tool executes. Can modify result. Handlers chain like middleware.
+```typescript
+import { isToolResultEventType } from "@singularity-forge/pi-coding-agent";
+
+pi.on("tool_result", async (event, ctx) => {
+  if (isToolResultEventType("bash", event)) {
+    // event.details is typed as BashToolDetails
+  }
+  // Return partial patch: { content, details, isError }
+  // Omitted fields keep current values
+});
+```
+
+**context** — Fired before each LLM call. Modify messages non-destructively.
+```typescript
+pi.on("context", async (event, ctx) => {
+  // event.messages is a deep copy — safe to modify
+  const filtered = event.messages.filter(m => !shouldPrune(m));
+  return { messages: filtered };
+});
+```
+
+**input** — Fired when user input is received, before skill/template expansion.
+```typescript
+pi.on("input", async (event, ctx) => {
+  // event.text — raw input
+  // event.source — "interactive", "rpc", or "extension"
+  if (event.text.startsWith("?quick "))
+    return { action: "transform", text: `Respond briefly: ${event.text.slice(7)}` };
+  return { action: "continue" };
+});
+```
+
+**model_select** — Fired when model changes.
+```typescript
+pi.on("model_select", async (event, ctx) => {
+  // event.model, event.previousModel, event.source ("set"|"cycle"|"restore")
+});
+```
+
+</key_events>
+
+<type_narrowing>
+Built-in type guards for tool events:
+
+```typescript
+import { isToolCallEventType, isToolResultEventType } from "@singularity-forge/pi-coding-agent";
+
+// Tool calls — narrows event.input type
+if (isToolCallEventType("bash", event)) { /* event.input: { command, timeout? } */ }
+if (isToolCallEventType("read", event)) { /* event.input: { path, offset?, limit? } */ }
+if (isToolCallEventType("write", event)) { /* event.input: { path, content } */ }
+if (isToolCallEventType("edit", event)) { /* event.input: { path, oldText, newText } */ }
+
+// Tool results — narrows event.details type
+if (isToolResultEventType("bash", event)) { /* event.details: BashToolDetails */ }
+```
+
+For custom tools, export your input type and use explicit type params:
+```typescript
+if (isToolCallEventType<"my_tool", MyToolInput>("my_tool", event)) {
+  event.input.action; // typed
+}
+```
+</type_narrowing>
--- a/src/resources/skills/create-sf-extension/references/extension-lifecycle.md
+++ b/src/resources/skills/create-sf-extension/references/extension-lifecycle.md
@ -0,0 +1,64 @@
+<overview>
+The extension lifecycle from load to shutdown, including the full event flow.
+</overview>
+
+<loading>
+Extensions load when SF starts (or on `/reload`). The default export function runs synchronously — subscribe to events and register tools/commands during this call.
+
+```
+SF starts
+  └─► Extension default function runs
+      ├── pi.on("event", handler)      ← Subscribe
+      ├── pi.registerTool({...})       ← Register tools
+      ├── pi.registerCommand(...)      ← Register commands
+      └── pi.registerShortcut(...)     ← Register shortcuts
+  └─► session_start fires
+```
+</loading>
+
+<event_flow>
+Full event flow per user prompt:
+
+```
+user sends prompt
+  ├─► Extension commands checked (bypass if match)
+  ├─► input event (can intercept/transform/handle)
+  ├─► Skill/template expansion
+  ├─► before_agent_start (inject message, modify system prompt)
+  ├─► agent_start
+  │
+  │   ┌── Turn loop (repeats while LLM calls tools) ──┐
+  │   │ turn_start                                     │
+  │   │ context (can modify messages sent to LLM)      │
+  │   │ before_provider_request (inspect/replace payload)│
+  │   │ LLM responds → may call tools:                 │
+  │   │   tool_call (can BLOCK)                        │
+  │   │   tool_execution_start/update/end              │
+  │   │   tool_result (can MODIFY)                     │
+  │   │ turn_end                                       │
+  │   └────────────────────────────────────────────────┘
+  │
+  └─► agent_end
+```
+</event_flow>
+
+<session_events>
+| Event | When | Can Return |
+|-------|------|------------|
+| `session_start` | Session loads | — |
+| `session_before_switch` | Before `/new` or `/resume` | `{ cancel: true }` |
+| `session_switch` | After switch | — |
+| `session_before_fork` | Before `/fork` | `{ cancel: true }`, `{ skipConversationRestore: true }` |
+| `session_fork` | After fork | — |
+| `session_before_compact` | Before compaction | `{ cancel: true }`, `{ compaction: {...} }` |
+| `session_compact` | After compaction | — |
+| `session_shutdown` | On exit | — |
+</session_events>
+
+<hot_reload>
+Extensions in auto-discovered locations hot-reload with `/reload`:
+- `session_shutdown` fires for old runtime
+- Resources re-scanned
+- `session_start` fires for new runtime
+- Code after `await ctx.reload()` still runs from the pre-reload version — treat as terminal
+</hot_reload>
--- a/src/resources/skills/create-sf-extension/references/extensionapi-reference.md
+++ b/src/resources/skills/create-sf-extension/references/extensionapi-reference.md
@ -0,0 +1,75 @@
+<overview>
+ExtensionAPI methods — the `pi` object received in the default export function.
+</overview>
+
+<core_registration>
+| Method | Purpose |
+|--------|---------|
+| `pi.on(event, handler)` | Subscribe to events |
+| `pi.registerTool(definition)` | Register LLM-callable tool |
+| `pi.registerCommand(name, options)` | Register `/command` |
+| `pi.registerShortcut(key, options)` | Register keyboard shortcut |
+| `pi.registerFlag(name, options)` | Register CLI flag |
+| `pi.registerMessageRenderer(customType, renderer)` | Custom message rendering |
+| `pi.registerProvider(name, config)` | Register/override model provider |
+| `pi.unregisterProvider(name)` | Remove a provider |
+</core_registration>
+
+<messaging>
+| Method | Purpose |
+|--------|---------|
+| `pi.sendMessage(message, options?)` | Inject custom message into session |
+| `pi.sendUserMessage(content, options?)` | Send user message (triggers turn) |
+
+**Delivery modes for `sendMessage`:**
+- `"steer"` (default) — Interrupts streaming after current tool
+- `"followUp"` — Waits for agent to finish all tools
+- `"nextTurn"` — Queued for next user prompt
+
+```typescript
+pi.sendMessage({
+  customType: "my-extension",
+  content: "Additional context",
+  display: true,
+  details: { ... },
+}, { deliverAs: "steer", triggerTurn: true });
+```
+</messaging>
+
+<state_session>
+| Method | Purpose |
+|--------|---------|
+| `pi.appendEntry(customType, data?)` | Persist state (NOT sent to LLM) |
+| `pi.setSessionName(name)` | Set session display name |
+| `pi.getSessionName()` | Get session name |
+| `pi.setLabel(entryId, label)` | Bookmark entry for `/tree` |
+</state_session>
+
+<tool_management>
+```typescript
+const active = pi.getActiveTools();    // ["read", "bash", "edit", "write"]
+const all = pi.getAllTools();          // [{ name, description }, ...]
+pi.setActiveTools(["read", "bash"]);  // Enable/disable tools
+```
+</tool_management>
+
+<model_management>
+```typescript
+const model = ctx.modelRegistry.find("anthropic", "claude-sonnet-4-5");
+if (model) {
+  const success = await pi.setModel(model);  // Returns false if no API key
+}
+
+pi.getThinkingLevel();               // "off" | "minimal" | "low" | "medium" | "high" | "xhigh"
+pi.setThinkingLevel("high");
+```
+</model_management>
+
+<utilities>
+| Method | Purpose |
+|--------|---------|
+| `pi.exec(cmd, args, opts?)` | Shell command (prefer over child_process) |
+| `pi.events` | Shared event bus for inter-extension communication |
+| `pi.getFlag(name)` | Get CLI flag value |
+| `pi.getCommands()` | All available slash commands |
+</utilities>
--- a/src/resources/skills/create-sf-extension/references/extensioncontext-reference.md
+++ b/src/resources/skills/create-sf-extension/references/extensioncontext-reference.md
@ -0,0 +1,53 @@
+<overview>
+ExtensionContext (`ctx`) — available in all event handlers (except `session_directory`).
+</overview>
+
+<ui_methods>
+**Dialogs (blocking — wait for user response):**
+```typescript
+const choice = await ctx.ui.select("Pick one:", ["A", "B", "C"]);
+const ok = await ctx.ui.confirm("Delete?", "This cannot be undone");
+const name = await ctx.ui.input("Name:", "placeholder");
+const text = await ctx.ui.editor("Edit:", "prefilled text");
+
+// Timed dialog — auto-dismiss after timeout
+const ok = await ctx.ui.confirm("Auto-confirm?", "Proceeds in 5s", { timeout: 5000 });
+```
+
+**Non-blocking UI:**
+```typescript
+ctx.ui.notify("Done!", "info");                     // Toast: "info" | "warning" | "error"
+ctx.ui.setStatus("my-ext", "● Active");             // Footer status
+ctx.ui.setStatus("my-ext", undefined);              // Clear
+ctx.ui.setWidget("my-id", ["Line 1", "Line 2"]);   // Widget above editor
+ctx.ui.setWidget("my-id", ["Below!"], { placement: "belowEditor" });
+ctx.ui.setTitle("sf - my project");                 // Terminal title
+ctx.ui.setEditorText("Prefill");                    // Set editor content
+ctx.ui.setWorkingMessage("Analyzing...");           // Working message during streaming
+ctx.ui.setToolsExpanded(true);                      // Expand tool output
+```
+</ui_methods>
+
+<ctx_properties>
+| Property/Method | Purpose |
+|----------------|---------|
+| `ctx.hasUI` | `false` in print/JSON mode — check before dialogs |
+| `ctx.cwd` | Current working directory |
+| `ctx.sessionManager` | Read-only session state |
+| `ctx.modelRegistry` / `ctx.model` | Model access |
+| `ctx.isIdle()` / `ctx.abort()` / `ctx.hasPendingMessages()` | Agent state |
+| `ctx.shutdown()` | Request graceful exit (deferred until idle) |
+| `ctx.getContextUsage()` | Current context token usage |
+| `ctx.compact(options?)` | Trigger compaction |
+| `ctx.getSystemPrompt()` | Current effective system prompt |
+</ctx_properties>
+
+<session_manager>
+```typescript
+ctx.sessionManager.getEntries()       // All entries
+ctx.sessionManager.getBranch()        // Current branch
+ctx.sessionManager.getLeafId()        // Current leaf entry ID
+ctx.sessionManager.getSessionFile()   // Session JSONL path
+ctx.sessionManager.getLabel(entryId)  // Entry label
+```
+</session_manager>
--- a/src/resources/skills/create-sf-extension/references/key-rules-gotchas.md
+++ b/src/resources/skills/create-sf-extension/references/key-rules-gotchas.md
@ -0,0 +1,37 @@
+<overview>
+Non-negotiable rules and common gotchas when building SF extensions.
+</overview>
+
+<must_follow>
+1. **Use `StringEnum` for string enums** — `Type.Union`/`Type.Literal` breaks Google's API.
+2. **Truncate tool output** — Large output causes context overflow, compaction failures, degraded performance. Limit: 50KB / 2000 lines.
+3. **Use theme from callback** — Don't import theme directly. Use the `theme` parameter from `ctx.ui.custom()` or render functions.
+4. **`DynamicBorder` color param** — Type as `(s: string) => theme.fg("accent", s)`.
+5. **Call `tui.requestRender()` after state changes** in `handleInput`.
+6. **Return `{ render, invalidate, handleInput }`** from custom components.
+7. **Lines must not exceed `width`** in `render()` — use `truncateToWidth()`.
+8. **Session control methods ONLY in commands** — `waitForIdle()`, `newSession()`, `fork()`, `navigateTree()`, `reload()` will **deadlock** in event handlers.
+9. **Strip leading `@` from path arguments** — some models add it.
+10. **Store state in tool result `details`** for proper branching support.
+</must_follow>
+
+<common_patterns>
+- Rebuild component on `invalidate()` when pre-baking theme colors
+- Check `signal?.aborted` in long-running tool executions
+- Use `pi.exec()` instead of `child_process` for shell commands
+- Overlay components are **disposed when closed** — create fresh instances each time
+- Treat `ctx.reload()` as terminal — code after runs from pre-reload version
+- Check `ctx.hasUI` before dialog methods (false in print/JSON mode)
+- Extension errors are logged but don't crash SF — tool_call handler errors fail-safe (block the tool)
+</common_patterns>
+
+<_sf_paths>
+**SF extension paths (community/user-installed extensions):**
+- Global: `~/.sf/agent/extensions/*.ts`
+- Global (subdir): `~/.sf/agent/extensions/*/index.ts`
+- Project-local: `.sf/extensions/*.ts`
+- Project-local (subdir): `.sf/extensions/*/index.ts`
+
+Note: `~/.sf/agent/extensions/` is reserved for bundled extensions synced from the sf package.
+Community extensions placed there are silently ignored by the loader.
+</_sf_paths>
--- a/src/resources/skills/create-sf-extension/references/mode-behavior.md
+++ b/src/resources/skills/create-sf-extension/references/mode-behavior.md
@ -0,0 +1,32 @@
+<overview>
+Mode behavior determines which UI methods work. Extensions may run in non-interactive modes where dialogs are unavailable.
+</overview>
+
+<mode_table>
+| Mode | UI Methods | Notes |
+|------|-----------|-------|
+| **Interactive** (default) | Full TUI | Normal operation — all UI works |
+| **RPC** (`--mode rpc`) | JSON protocol | Host handles UI, dialogs work via sub-protocol |
+| **JSON** (`--mode json`) | No-op | Event stream to stdout, no UI |
+| **Print** (`-p`) | No-op | Extensions run but can't prompt users |
+</mode_table>
+
+<checking_ui>
+**Always check `ctx.hasUI`** before calling dialog methods:
+
+```typescript
+if (ctx.hasUI) {
+  const ok = await ctx.ui.confirm("Delete?", "Sure?");
+  if (!ok) return;
+} else {
+  // Default behavior for non-interactive mode
+  // Or just proceed without confirmation
+}
+```
+
+`ctx.hasUI` is `false` in print mode (`-p`) and JSON mode. `true` in interactive and RPC mode.
+</checking_ui>
+
+<fire_and_forget>
+Non-blocking methods (`notify`, `setStatus`, `setWidget`, `setTitle`, `setEditorText`) are safe in all modes — they're no-ops when no UI is available.
+</fire_and_forget>
--- a/src/resources/skills/create-sf-extension/references/model-provider-management.md
+++ b/src/resources/skills/create-sf-extension/references/model-provider-management.md
@ -0,0 +1,89 @@
+<overview>
+Model and provider management — switching models, registering custom providers with OAuth, and reacting to model changes.
+</overview>
+
+<switching_models>
+```typescript
+const model = ctx.modelRegistry.find("anthropic", "claude-sonnet-4-5");
+if (model) {
+  const success = await pi.setModel(model);
+  if (!success) ctx.ui.notify("No API key for this model", "error");
+}
+
+// Thinking level
+pi.getThinkingLevel();  // "off" | "minimal" | "low" | "medium" | "high" | "xhigh"
+pi.setThinkingLevel("high");  // Clamped to model capabilities
+```
+</switching_models>
+
+<register_provider>
+```typescript
+pi.registerProvider("my-proxy", {
+  baseUrl: "https://proxy.example.com",
+  apiKey: "PROXY_API_KEY",  // Env var name or literal
+  api: "anthropic-messages",  // or "openai-completions", "openai-responses"
+  headers: { "X-Custom": "value" },  // Optional custom headers
+  authHeader: true,  // Auto-add Authorization: Bearer header
+  models: [
+    {
+      id: "claude-sonnet-4-20250514",
+      name: "Claude 4 Sonnet (proxy)",
+      reasoning: false,
+      input: ["text", "image"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 200000,
+      maxTokens: 16384,
+    }
+  ],
+});
+
+// Override just baseUrl for an existing provider (keeps all models)
+pi.registerProvider("anthropic", {
+  baseUrl: "https://proxy.example.com",
+});
+
+// Remove a provider (restores any overridden built-in models)
+pi.unregisterProvider("my-proxy");
+```
+
+Takes effect immediately after initial load phase — no `/reload` required.
+</register_provider>
+
+<oauth_provider>
+Register a provider with OAuth support for `/login`:
+
+```typescript
+pi.registerProvider("corporate-ai", {
+  baseUrl: "https://ai.corp.com",
+  api: "openai-responses",
+  models: [/* ... */],
+  oauth: {
+    name: "Corporate AI (SSO)",
+    async login(callbacks) {
+      callbacks.onAuth({ url: "https://sso.corp.com/..." });
+      const code = await callbacks.onPrompt({ message: "Enter code:" });
+      return { refresh: code, access: code, expires: Date.now() + 3600000 };
+    },
+    async refreshToken(credentials) {
+      return credentials;  // Refresh logic
+    },
+    getApiKey(credentials) {
+      return credentials.access;
+    },
+  },
+});
+```
+</oauth_provider>
+
+<model_events>
+React to model changes:
+
+```typescript
+pi.on("model_select", async (event, ctx) => {
+  // event.model — newly selected model
+  // event.previousModel — previous model (undefined if first)
+  // event.source — "set" | "cycle" | "restore"
+  ctx.ui.setStatus("model", `${event.model.provider}/${event.model.id}`);
+});
+```
+</model_events>
--- a/src/resources/skills/create-sf-extension/references/packaging-distribution.md
+++ b/src/resources/skills/create-sf-extension/references/packaging-distribution.md
@ -0,0 +1,55 @@
+<overview>
+Packaging extensions for distribution via npm, git, or local paths. Creating SF packages.
+</overview>
+
+<package_manifest>
+Add a `pi` manifest to `package.json`:
+
+```json
+{
+  "name": "my-sf-package",
+  "keywords": ["pi-package"],
+  "pi": {
+    "extensions": ["./extensions"],
+    "skills": ["./skills"],
+    "prompts": ["./prompts"],
+    "themes": ["./themes"]
+  }
+}
+```
+</package_manifest>
+
+<installing>
+```bash
+sf install npm:@foo/bar@1.0.0
+sf install git:github.com/user/repo@v1
+sf install ./local/path
+
+# Try without installing:
+sf -e npm:@foo/bar
+```
+</installing>
+
+<convention_directories>
+If no `pi` manifest exists, auto-discovers:
+- `extensions/` → `.ts` and `.js` files
+- `skills/` → `SKILL.md` folders
+- `prompts/` → `.md` files
+- `themes/` → `.json` files
+</convention_directories>
+
+<dependencies>
+- List `@singularity-forge/pi-ai`, `@singularity-forge/pi-coding-agent`, `@singularity-forge/pi-tui`, `@sinclair/typebox` in `peerDependencies` with `"*"` — they're bundled by the runtime.
+- Other npm deps go in `dependencies`. The runtime runs `npm install` on package installation.
+</dependencies>
+
+<gallery_metadata>
+```json
+{
+  "pi": {
+    "video": "https://example.com/demo.mp4",
+    "image": "https://example.com/screenshot.png"
+  }
+}
+```
+</gallery_metadata>
--- a/src/resources/skills/create-sf-extension/references/remote-execution-overrides.md
+++ b/src/resources/skills/create-sf-extension/references/remote-execution-overrides.md
@ -0,0 +1,90 @@
+<overview>
+Remote execution via pluggable operations, spawnHook for bash, and tool override patterns.
+</overview>
+
+<pluggable_operations>
+Built-in tools support pluggable operations for SSH, containers, etc.:
+
+```typescript
+import { createReadTool, createBashTool, createWriteTool } from "@singularity-forge/pi-coding-agent";
+
+// Create tool with custom remote operations
+const remoteBash = createBashTool(cwd, {
+  operations: {
+    execute: (cmd) => sshExec(remote, cmd),
+  },
+});
+```
+
+**Operations interfaces:** `ReadOperations`, `WriteOperations`, `EditOperations`, `BashOperations`, `LsOperations`, `GrepOperations`, `FindOperations`
+</pluggable_operations>
+
+<spawn_hook>
+The bash tool supports a `spawnHook` to modify commands before execution:
+
+```typescript
+const bashTool = createBashTool(cwd, {
+  spawnHook: ({ command, cwd, env }) => ({
+    command: `source ~/.profile\n${command}`,
+    cwd: `/mnt/sandbox${cwd}`,
+    env: { ...env, CI: "1" },
+  }),
+});
+```
+</spawn_hook>
+
+<ssh_pattern>
+Full SSH pattern with flag-based switching:
+
+```typescript
+import { createBashTool, type ExtensionAPI } from "@singularity-forge/pi-coding-agent";
+
+export default function (pi: ExtensionAPI) {
+  pi.registerFlag("ssh", { description: "SSH target", type: "string" });
+
+  const localBash = createBashTool(process.cwd());
+
+  pi.registerTool({
+    ...localBash,
+    async execute(id, params, signal, onUpdate, ctx) {
+      const sshTarget = pi.getFlag("--ssh");
+      if (sshTarget) {
+        const remoteBash = createBashTool(process.cwd(), {
+          operations: createSSHOperations(sshTarget),
+        });
+        return remoteBash.execute(id, params, signal, onUpdate);
+      }
+      return localBash.execute(id, params, signal, onUpdate);
+    },
+  });
+}
+```
+</ssh_pattern>
+
+<tool_override_pattern>
+Override built-in tools for logging/access control — omit renderCall/renderResult to keep built-in rendering:
+
+```typescript
+import { createReadTool } from "@singularity-forge/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
+
+pi.registerTool({
+  name: "read",  // Same name = overrides built-in
+  label: "Read (Logged)",
+  description: "Read file contents with logging",
+  parameters: Type.Object({
+    path: Type.String(),
+    offset: Type.Optional(Type.Number()),
+    limit: Type.Optional(Type.Number()),
+  }),
+  async execute(toolCallId, params, signal, onUpdate, ctx) {
+    console.log(`[AUDIT] Reading: ${params.path}`);
+    const builtIn = createReadTool(ctx.cwd);
+    return builtIn.execute(toolCallId, params, signal, onUpdate);
+  },
+  // Omit renderCall/renderResult → built-in renderer used automatically
+});
+```
+
+**Must match exact result shape** including `details` type.
+</tool_override_pattern>
--- a/src/resources/skills/create-sf-extension/references/state-management.md
+++ b/src/resources/skills/create-sf-extension/references/state-management.md
@ -0,0 +1,70 @@
+<overview>
+State management patterns for extensions — tool result details (branch-safe) and appendEntry (private).
+</overview>
+
+<tool_result_details>
+**Recommended for stateful tools.** State in `details` works correctly with branching/forking.
+
+```typescript
+export default function (pi: ExtensionAPI) {
+  let items: string[] = [];
+
+  // Reconstruct state from session on load
+  pi.on("session_start", async (_event, ctx) => reconstructState(ctx));
+  pi.on("session_switch", async (_event, ctx) => reconstructState(ctx));
+  pi.on("session_fork", async (_event, ctx) => reconstructState(ctx));
+  pi.on("session_tree", async (_event, ctx) => reconstructState(ctx));
+
+  const reconstructState = (ctx: ExtensionContext) => {
+    items = [];
+    for (const entry of ctx.sessionManager.getBranch()) {
+      if (entry.type === "message" && entry.message.role === "toolResult") {
+        if (entry.message.toolName === "my_tool") {
+          items = entry.message.details?.items ?? [];
+        }
+      }
+    }
+  };
+
+  pi.registerTool({
+    name: "my_tool",
+    // ...
+    async execute(toolCallId, params, signal, onUpdate, ctx) {
+      items.push(params.text);
+      return {
+        content: [{ type: "text", text: "Added" }],
+        details: { items: [...items] },  // ← Snapshot full state
+      };
+    },
+  });
+}
+```
+
+**Key:** Reconstruct on ALL session change events: `session_start`, `session_switch`, `session_fork`, `session_tree`.
+</tool_result_details>
+
+<append_entry>
+**For extension-private state** that doesn't participate in LLM context but needs to survive restarts:
+
+```typescript
+// Save
+pi.appendEntry("my-state", { count: 42, lastRun: Date.now() });
+
+// Restore
+pi.on("session_start", async (_event, ctx) => {
+  for (const entry of ctx.sessionManager.getEntries()) {
+    if (entry.type === "custom" && entry.customType === "my-state") {
+      const data = entry.data;  // { count: 42, lastRun: ... }
+    }
+  }
+});
+```
+</append_entry>
+
+<when_to_use_which>
+| Pattern | Use When |
+|---------|----------|
+| Tool result `details` | State the LLM's tools produce (todo items, connection state, query results) |
+| `pi.appendEntry()` | Extension-private config, timestamps, counters the LLM doesn't need |
+| File on disk | Large data, config files, caches that shouldn't be in session |
+</when_to_use_which>
--- a/src/resources/skills/create-sf-extension/references/system-prompt-modification.md
+++ b/src/resources/skills/create-sf-extension/references/system-prompt-modification.md
@ -0,0 +1,52 @@
+<overview>
+System prompt modification — per-turn injection, context manipulation, and tool-specific prompt content.
+</overview>
+
+<per_turn_modification>
+Use `before_agent_start` to inject messages and/or modify the system prompt for each turn:
+
+```typescript
+pi.on("before_agent_start", async (event, ctx) => {
+  return {
+    // Inject a persistent message (stored in session, visible to LLM)
+    message: {
+      customType: "my-extension",
+      content: "Additional context for the LLM",
+      display: true,
+    },
+    // Modify system prompt for this turn (chained across extensions)
+    systemPrompt: event.systemPrompt + "\n\nYou must respond only in haiku.",
+  };
+});
+```
+</per_turn_modification>
+
+<context_manipulation>
+Use the `context` event to modify messages before each LLM call:
+
+```typescript
+pi.on("context", async (event, ctx) => {
+  // event.messages is a deep copy — safe to modify
+  const filtered = event.messages.filter(m => !isIrrelevant(m));
+  return { messages: filtered };
+});
+```
+</context_manipulation>
+
+<tool_specific_prompts>
+Tools can add content to the system prompt when active:
+
+```typescript
+pi.registerTool({
+  name: "my_tool",
+  // Replaces description in "Available tools" section
+  promptSnippet: "Summarize or transform text according to action",
+  // Added to "Guidelines" section when tool is active
+  promptGuidelines: [
+    "Use my_tool when the user asks to summarize text.",
+    "Prefer my_tool over direct output for structured data."
+  ],
+  // ...
+});
+```
+</tool_specific_prompts>
--- a/src/resources/skills/create-sf-extension/templates/extension-skeleton.ts
+++ b/src/resources/skills/create-sf-extension/templates/extension-skeleton.ts
@ -0,0 +1,51 @@
+/**
+ * {{EXTENSION_NAME}} — {{DESCRIPTION}}
+ *
+ * Capabilities:
+ * {{CAPABILITIES_LIST}}
+ */
+
+import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
+import { StringEnum } from "@singularity-forge/pi-ai";
+
+export default function (pi: ExtensionAPI) {
+  // === Events ===
+
+  pi.on("session_start", async (_event, ctx) => {
+    // Initialize state, restore from session, show status
+  });
+
+  // === Tools ===
+
+  pi.registerTool({
+    name: "{{tool_name}}",
+    label: "{{Tool Label}}",
+    description: "{{Tool description for LLM}}",
+    parameters: Type.Object({
+      action: StringEnum(["list", "add"] as const),
+      text: Type.Optional(Type.String({ description: "Item text" })),
+    }),
+    async execute(toolCallId, params, signal, onUpdate, ctx) {
+      if (signal?.aborted) {
+        return { content: [{ type: "text", text: "Cancelled" }] };
+      }
+
+      // Do work here
+
+      return {
+        content: [{ type: "text", text: "Result for LLM" }],
+        details: {},
+      };
+    },
+  });
+
+  // === Commands ===
+
+  pi.registerCommand("{{command_name}}", {
+    description: "{{Command description}}",
+    handler: async (args, ctx) => {
+      ctx.ui.notify(`Running ${args}`, "info");
+    },
+  });
+}
--- a/src/resources/skills/create-sf-extension/templates/stateful-tool-skeleton.ts
+++ b/src/resources/skills/create-sf-extension/templates/stateful-tool-skeleton.ts
@ -0,0 +1,143 @@
+/**
+ * {{EXTENSION_NAME}} — Stateful tool with persistence
+ *
+ * State is stored in tool result details for proper branching support.
+ */
+
+import type { ExtensionAPI, ExtensionContext } from "@singularity-forge/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
+import { StringEnum } from "@singularity-forge/pi-ai";
+import { Text, truncateToWidth, matchesKey, Key } from "@singularity-forge/pi-tui";
+
+interface {{ItemType}} {
+  id: number;
+  // Add fields
+}
+
+interface {{ToolDetails}} {
+  action: string;
+  items: {{ItemType}}[];
+  nextId: number;
+  error?: string;
+}
+
+export default function (pi: ExtensionAPI) {
+  let items: {{ItemType}}[] = [];
+  let nextId = 1;
+
+  // Reconstruct state from session
+  const reconstructState = (ctx: ExtensionContext) => {
+    items = [];
+    nextId = 1;
+    for (const entry of ctx.sessionManager.getBranch()) {
+      if (entry.type === "message" && entry.message.role === "toolResult") {
+        if (entry.message.toolName === "{{tool_name}}") {
+          const details = entry.message.details as {{ToolDetails}} | undefined;
+          if (details) {
+            items = details.items;
+            nextId = details.nextId;
+          }
+        }
+      }
+    }
+  };
+
+  // Reconstruct on ALL session change events
+  pi.on("session_start", async (_event, ctx) => reconstructState(ctx));
+  pi.on("session_switch", async (_event, ctx) => reconstructState(ctx));
+  pi.on("session_fork", async (_event, ctx) => reconstructState(ctx));
+  pi.on("session_tree", async (_event, ctx) => reconstructState(ctx));
+
+  // Register the tool
+  pi.registerTool({
+    name: "{{tool_name}}",
+    label: "{{Tool Label}}",
+    description: "{{Description for LLM}}",
+    parameters: Type.Object({
+      action: StringEnum(["list", "add", "remove"] as const),
+      text: Type.Optional(Type.String({ description: "Item text" })),
+      id: Type.Optional(Type.Number({ description: "Item ID" })),
+    }),
+
+    async execute(toolCallId, params, signal, onUpdate, ctx) {
+      if (signal?.aborted) {
+        return { content: [{ type: "text", text: "Cancelled" }] };
+      }
+
+      switch (params.action) {
+        case "list":
+          return {
+            content: [{ type: "text", text: items.length ? JSON.stringify(items) : "No items" }],
+            details: { action: "list", items: [...items], nextId } as {{ToolDetails}},
+          };
+
+        case "add": {
+          if (!params.text) throw new Error("text required for add");
+          const item: {{ItemType}} = { id: nextId++ /* , ... */ };
+          items.push(item);
+          return {
+            content: [{ type: "text", text: `Added #${item.id}` }],
+            details: { action: "add", items: [...items], nextId } as {{ToolDetails}},
+          };
+        }
+
+        case "remove": {
+          if (params.id === undefined) throw new Error("id required for remove");
+          const idx = items.findIndex(i => i.id === params.id);
+          if (idx === -1) throw new Error(`Item #${params.id} not found`);
+          items.splice(idx, 1);
+          return {
+            content: [{ type: "text", text: `Removed #${params.id}` }],
+            details: { action: "remove", items: [...items], nextId } as {{ToolDetails}},
+          };
+        }
+
+        default:
+          throw new Error(`Unknown action: ${params.action}`);
+      }
+    },
+
+    // Custom rendering
+    renderCall(args, theme) {
+      let text = theme.fg("toolTitle", theme.bold("{{tool_name}} "));
+      text += theme.fg("muted", args.action);
+      return new Text(text, 0, 0);
+    },
+
+    renderResult(result, { expanded }, theme) {
+      const details = result.details as {{ToolDetails}} | undefined;
+      if (!details) return new Text("", 0, 0);
+      if (details.error) return new Text(theme.fg("error", details.error), 0, 0);
+      return new Text(theme.fg("success", `✓ ${details.action} (${details.items.length} items)`), 0, 0);
+    },
+  });
+
+  // User command to view state
+  pi.registerCommand("{{command_name}}", {
+    description: "View {{items}}",
+    handler: async (_args, ctx) => {
+      if (!ctx.hasUI) {
+        ctx.ui.notify("Requires interactive mode", "error");
+        return;
+      }
+      await ctx.ui.custom<void>((_tui, theme, _kb, done) => ({
+        render(width: number): string[] {
+          const lines = [
+            "",
+            truncateToWidth(theme.fg("accent", ` {{Items}} (${items.length}) `), width),
+            "",
+          ];
+          for (const item of items) {
+            lines.push(truncateToWidth(`  #${item.id}`, width));
+          }
+          lines.push("", truncateToWidth(theme.fg("dim", "  Press Escape to close"), width), "");
+          return lines;
+        },
+        handleInput(data: string) {
+          if (matchesKey(data, Key.escape)) done();
+        },
+        invalidate() {},
+      }));
+    },
+  });
+}
--- a/src/resources/skills/create-sf-extension/templates/templates.test.ts
+++ b/src/resources/skills/create-sf-extension/templates/templates.test.ts
@ -0,0 +1,58 @@
+// SF — Extension template import path validation
+// Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, readdirSync, statSync } from "node:fs";
+import { join, dirname, relative } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const skillRoot = join(__dirname, "..");
+
+function walk(dir: string): string[] {
+	const out: string[] = [];
+	for (const entry of readdirSync(dir)) {
+		const full = join(dir, entry);
+		if (statSync(full).isDirectory()) {
+			out.push(...walk(full));
+		} else {
+			out.push(full);
+		}
+	}
+	return out;
+}
+
+describe("extension templates use @singularity-forge/* imports", () => {
+	const templates = ["extension-skeleton.ts", "stateful-tool-skeleton.ts"];
+
+	for (const template of templates) {
+		it(`${template} uses @singularity-forge/pi-coding-agent (not @mariozechner)`, () => {
+			const content = readFileSync(join(__dirname, template), "utf-8");
+			assert.ok(content.includes("@singularity-forge/pi-coding-agent"), `Expected @singularity-forge/pi-coding-agent import in ${template}`);
+			assert.ok(!content.includes("@mariozechner/"), `Found stale @mariozechner/ import in ${template}`);
+		});
+	}
+
+	it("extension-skeleton.ts uses @singularity-forge/pi-ai for StringEnum", () => {
+		const content = readFileSync(join(__dirname, "extension-skeleton.ts"), "utf-8");
+		assert.ok(content.includes("@singularity-forge/pi-ai"), "Expected @singularity-forge/pi-ai import");
+	});
+
+	it("stateful-tool-skeleton.ts uses @singularity-forge/pi-tui", () => {
+		const content = readFileSync(join(__dirname, "stateful-tool-skeleton.ts"), "utf-8");
+		assert.ok(content.includes("@singularity-forge/pi-tui"), "Expected @singularity-forge/pi-tui import");
+	});
+
+	it("no @mariozechner/ references anywhere in create-sf-extension/", () => {
+		const offenders: string[] = [];
+		for (const file of walk(skillRoot)) {
+			if (file.endsWith("templates.test.ts")) continue;
+			const content = readFileSync(file, "utf-8");
+			if (content.includes("@mariozechner/")) {
+				offenders.push(relative(skillRoot, file));
+			}
+		}
+		assert.deepEqual(offenders, [], `Stale @mariozechner/ references found in: ${offenders.join(", ")}`);
+	});
+});
--- a/src/resources/skills/create-sf-extension/workflows/add-capability.md
+++ b/src/resources/skills/create-sf-extension/workflows/add-capability.md
@ -0,0 +1,57 @@
+<required_reading>
+Read the reference file for the specific capability being added:
+- Tools → references/custom-tools.md
+- Commands → references/custom-commands.md
+- Events → references/events-reference.md
+- UI → references/custom-ui.md
+- Rendering → references/custom-rendering.md
+- State → references/state-management.md
+- System prompt → references/system-prompt-modification.md
+</required_reading>
+
+<process>
+
+## Step 1: Identify the Extension
+
+Locate the existing extension file. Check:
+- `~/.sf/agent/extensions/` (global community extensions)
+- `.sf/extensions/` (project-local)
+
+Read the current extension code to understand its structure.
+
+## Step 2: Add the Capability
+
+Add the new registration/hook inside the existing `export default function (pi: ExtensionAPI)` body. Follow the patterns in the relevant reference file.
+
+If the extension needs new imports, add them at the top of the file.
+
+## Step 3: Handle Structural Changes
+
+**Single file → Directory**: If the extension is outgrowing a single file:
+1. Create `~/.sf/agent/extensions/my-extension/`
+2. Move the file to `index.ts`
+3. Extract helpers to separate files
+
+**Adding npm dependencies**: If new packages are needed:
+1. Create `package.json` in the extension directory
+2. Add dependencies
+3. Run `npm install`
+4. Add `"pi": { "extensions": ["./index.ts"] }` to package.json
+
+## Step 4: Test
+
+```bash
+/reload
+```
+
+Verify the new capability works alongside existing ones.
+
+</process>
+
+<success_criteria>
+Capability addition is complete when:
+- [ ] New capability added without breaking existing functionality
+- [ ] All new imports resolve
+- [ ] `/reload` succeeds
+- [ ] New tool/command/hook tested with real invocation
+</success_criteria>
--- a/src/resources/skills/create-sf-extension/workflows/create-extension.md
+++ b/src/resources/skills/create-sf-extension/workflows/create-extension.md
@ -0,0 +1,176 @@
+<required_reading>
+**Read these reference files before proceeding:**
+1. references/extension-lifecycle.md
+2. references/custom-tools.md (if building tools)
+3. references/custom-commands.md (if building commands)
+4. references/events-reference.md (if building event hooks)
+5. references/key-rules-gotchas.md (always)
+</required_reading>
+
+<process>
+
+## Step 1: Determine Scope and Placement
+
+Ask the user:
+- **Global** (`~/.sf/agent/extensions/`) — Available in all SF sessions
+- **Project-local** (`.sf/extensions/`) — Available only in this project
+
+## Step 2: Determine Extension Capabilities
+
+Identify what the extension needs from the user's description:
+
+| Capability | API | When |
+|------------|-----|------|
+| Custom tool (LLM-callable) | `pi.registerTool()` | LLM needs to perform new actions |
+| Slash command | `pi.registerCommand()` | User needs direct actions |
+| Event interception | `pi.on("event", ...)` | Block/modify tool calls, inject context, react to lifecycle |
+| Custom UI | `ctx.ui.custom()` | Complex interactive displays |
+| System prompt modification | `before_agent_start` event | Add per-turn instructions |
+| Context filtering | `context` event | Modify messages sent to LLM |
+| State persistence | `details` in tool results or `pi.appendEntry()` | Stateful behavior |
+| Custom rendering | `renderCall` / `renderResult` | Control how tools appear in TUI |
+| Provider management | `pi.registerProvider()` | Custom model endpoints |
+| Keyboard shortcut | `pi.registerShortcut()` | Hotkey triggers |
+
+## Step 3: Choose Extension Structure
+
+**Directory with index.ts** — the standard pattern for all extensions:
+```
+~/.sf/agent/extensions/my-extension/
+├── extension-manifest.json   # Required — declares capabilities
+├── index.ts                  # Entry point (must export default function)
+├── tools.ts                  # Optional — tool implementations
+└── utils.ts                  # Optional — shared utilities
+```
+
+**Package with dependencies** — when npm packages are needed:
+```
+~/.sf/agent/extensions/my-extension/
+├── extension-manifest.json
+├── package.json
+├── src/index.ts
+└── node_modules/
+```
+
+For packages, `package.json` needs:
+```json
+{
+  "name": "my-extension",
+  "dependencies": { ... },
+  "pi": { "extensions": ["./src/index.ts"] }
+}
+```
+
+## Step 3b: Create the Extension Manifest
+
+Every extension must include an `extension-manifest.json`:
+
+```json
+{
+  "id": "my-extension",
+  "name": "My Extension",
+  "version": "1.0.0",
+  "description": "What this extension does in one line",
+  "tier": "community",
+  "requires": { "platform": ">=2.29.0" },
+  "provides": {
+    "tools": ["my_tool"],
+    "commands": ["mycommand"],
+    "hooks": ["session_start"]
+  }
+}
+```
+
+Only include non-empty arrays in `provides`. See `docs/extension-sdk/manifest-spec.md` for the full spec.
+
+## Step 4: Write the Extension
+
+Start with the skeleton:
+
+```typescript
+import type { ExtensionAPI } from "@singularity-forge/pi-coding-agent";
+
+export default function (pi: ExtensionAPI) {
+  // Register events, tools, commands here
+}
+```
+
+Then add capabilities based on Step 2. Reference the appropriate reference files for each capability.
+
+**Tool registration pattern:**
+```typescript
+import { Type } from "@sinclair/typebox";
+import { StringEnum } from "@singularity-forge/pi-ai";
+
+pi.registerTool({
+  name: "my_tool",
+  label: "My Tool",
+  description: "What this tool does (shown to LLM)",
+  parameters: Type.Object({
+    action: StringEnum(["list", "add"] as const),
+    text: Type.Optional(Type.String({ description: "Item text" })),
+  }),
+  async execute(toolCallId, params, signal, onUpdate, ctx) {
+    if (signal?.aborted) return { content: [{ type: "text", text: "Cancelled" }] };
+    return {
+      content: [{ type: "text", text: "Result for LLM" }],
+      details: { data: "for rendering and state" },
+    };
+  },
+});
+```
+
+**Command registration pattern:**
+```typescript
+pi.registerCommand("mycommand", {
+  description: "What this command does",
+  handler: async (args, ctx) => {
+    ctx.ui.notify(`Running with args: ${args}`, "info");
+  },
+});
+```
+
+**Event hook pattern:**
+```typescript
+pi.on("tool_call", async (event, ctx) => {
+  if (event.toolName === "bash" && event.input.command?.includes("rm -rf")) {
+    return { block: true, reason: "Blocked dangerous command" };
+  }
+});
+```
+
+## Step 5: Test the Extension
+
+```bash
+# Quick test without installing
+sf -e ./path/to/my-extension.ts
+
+# Or place in extensions dir and reload
+/reload
+```
+
+Verify:
+- Extension loads without errors (check SF startup output)
+- Tools appear when LLM is asked to use them
+- Commands respond to `/mycommand`
+- Event hooks trigger at expected points
+
+## Step 6: Iterate
+
+Fix issues, add features, refine. Use `/reload` for hot-reload during development.
+
+</process>
+
+<success_criteria>
+Extension creation is complete when:
+- [ ] Extension directory created with index.ts and extension-manifest.json
+- [ ] Manifest `provides` accurately lists all registered tools, commands, hooks, shortcuts
+- [ ] All imports resolve (TypeBox, pi-ai, pi-coding-agent, pi-tui as needed)
+- [ ] Tools use `StringEnum` for string enums (not `Type.Union`/`Type.Literal`)
+- [ ] Tool output is truncated if variable-length
+- [ ] State stored in `details` if extension is stateful
+- [ ] `ctx.hasUI` checked before dialog methods
+- [ ] Extension loads on `/reload` without errors
+- [ ] Tools callable by LLM, commands by user
+- [ ] Tested with at least one real invocation
+</success_criteria>
--- a/src/resources/skills/create-sf-extension/workflows/debug-extension.md
+++ b/src/resources/skills/create-sf-extension/workflows/debug-extension.md
@ -0,0 +1,76 @@
+<required_reading>
+1. references/key-rules-gotchas.md
+2. references/extension-lifecycle.md
+</required_reading>
+
+<process>
+
+## Step 1: Identify the Symptom
+
+| Symptom | Likely Cause |
+|---------|--------------|
+| Extension not loading | File not in discovery path, syntax error, missing export default |
+| Tool not appearing for LLM | Tool not registered, `pi.setActiveTools()` excluding it, tool name conflict |
+| Command not responding | Command not registered, name collision with built-in |
+| Event not firing | Wrong event name, handler returning too early, handler error (logged but swallowed) |
+| UI not rendering | `ctx.hasUI` is false (print mode), render lines exceed width, component not returning lines |
+| State lost on restart | State not stored in `details` or `appendEntry`, not reconstructing on `session_start` |
+| Google API errors | Using `Type.Union`/`Type.Literal` instead of `StringEnum` |
+| Context overflow | Tool output not truncated |
+| Deadlock/hang | Session control methods called from event handler (must be in command handler only) |
+| Render garbage | Theme imported directly instead of from callback, missing `truncateToWidth()` |
+
+## Step 2: Check Extension Loading
+
+```bash
+# Test in isolation
+sf -e ./path/to/extension.ts
+
+# Check SF startup output for errors
+# Extension errors are logged but don't crash SF
+```
+
+## Step 3: Verify File Location
+
+Community extensions must be in auto-discovery paths:
+- `~/.sf/agent/extensions/*.ts`
+- `~/.sf/agent/extensions/*/index.ts`
+- `.sf/extensions/*.ts`
+- `.sf/extensions/*/index.ts`
+
+Note: `~/.sf/agent/extensions/` is reserved for bundled extensions synced from the sf package.
+
+The file must `export default function(pi: ExtensionAPI) { ... }`.
+
+## Step 4: Check for Common Mistakes
+
+Read `../references/key-rules-gotchas.md` and verify each rule against the extension code.
+
+## Step 5: Add Debugging
+
+```typescript
+// Temporary: log to stderr (visible in SF output)
+console.error("[my-ext] Loading...");
+
+pi.on("session_start", async (_event, ctx) => {
+  console.error("[my-ext] Session started");
+  ctx.ui.notify("Extension loaded", "info");
+});
+```
+
+## Step 6: Fix and Reload
+
+Apply the fix and test:
+```
+/reload
+```
+
+</process>
+
+<success_criteria>
+Debugging is complete when:
+- [ ] Root cause identified
+- [ ] Fix applied
+- [ ] Extension loads and functions correctly after `/reload`
+- [ ] No regression in existing functionality
+</success_criteria>
--- a/src/resources/skills/forensics/SKILL.md
+++ b/src/resources/skills/forensics/SKILL.md
@ -0,0 +1,153 @@
+---
+name: forensics
+description: Post-mortem a failed sf auto-mode run. Traces from symptom to root cause using `.sf/activity/*.jsonl`, `.sf/journal/YYYY-MM-DD.jsonl`, `.sf/metrics.json`, and `.sf/auto.lock`. Produces a filing-ready bug report with file:line references and a concrete fix suggestion. Use when asked to "forensics", "post-mortem", "why did auto-mode fail", "trace the stuck loop", "debug the crash", after `/sf forensics` is invoked, or when a session ended in an unexpected terminal state. Reads existing artifacts — does NOT re-run anything.
+---
+
+<objective>
+Turn scattered sf runtime artifacts into one coherent cause chain. The deliverable is a GitHub-issue-ready report that names the file and line where the bug lives, cites the evidence, and proposes a fix. Forensics is archaeology, not re-run — no modifying state, no triggering commands, just reading the paper trail.
+</objective>
+
+<context>
+sf persists a lot of runtime evidence under `.sf/`:
+
+- `activity/{seq}-{unitType}-{unitId}.jsonl` — full tool-call and message stream per unit
+- `journal/YYYY-MM-DD.jsonl` — iteration-level events (dispatch-match, stuck-detected, guard-block, unit-start/end, terminal)
+- `metrics.json` — token/cost ledger; duplicate `type/id` entries indicate a stuck loop
+- `auto.lock` — JSON snapshot of the currently-owning PID; stale lock = crash mid-unit
+- `forensics/` — saved prior reports
+- `debug/` — debug logs if enabled
+- `runtime/paused-session.json` — serialized session when auto-mode paused
+- `doctor-history.jsonl` — doctor check history
+
+The `/sf forensics` command pre-computes a forensic report with anomalies flagged. This skill is the manual investigation that goes deeper, or runs when the automated report isn't enough.
+
+Invocation points:
+- `/sf forensics` has been run and user wants deeper analysis
+- Auto-mode exited unexpectedly, no obvious cause
+- Same unit dispatched multiple times (stuck loop suspected)
+- A session crashed and `auto.lock` is stale
+- User reports "it just stopped" or "it did the wrong thing"
+</context>
+
+<core_principle>
+**READ-ONLY.** Forensics touches no live state. Non-mutating inspection commands (e.g., `ps`, `top -b`, `cat /proc/*`) are allowed for checking process status or reading system files. Strictly prohibited: `sf_*` writes, commands that modify state, executing binaries that produce side effects, writing to files (outside the final report), or re-running the failed unit. The evidence must stay pristine for future investigations.
+
+**SYMPTOM → ROOT CAUSE, WITH CITATIONS.** Every claim in the report is backed by an artifact path and either a line number or a JSONL field. "The loop got stuck because of a race" is not useful; "`.sf/journal/2026-04-19.jsonl:142` shows `stuck-detected` with flowId X, caused by `dispatch-guard.ts:87` returning the same unit after `unit-end`" is.
+
+**PRE-PARSED LEADS, NOT CONCLUSIONS.** If `/sf forensics` has surfaced anomalies, treat them as hypotheses to verify, not answers.
+</core_principle>
+
+<process>
+
+## Step 1: Locate the evidence
+
+Read what's in `.sf/`:
+
+1. `auto.lock` — is it stale? Check PID against `ps` (read-only inspection, allowed). Stale = crash.
+2. Most recent `.sf/activity/*.jsonl` — sort by mtime, newest first. That's the last unit that ran.
+3. Today's `.sf/journal/YYYY-MM-DD.jsonl` — the iteration-level view.
+4. `.sf/metrics.json` — does any `type/id` appear more than once? (stuck loop signal)
+5. `.sf/runtime/paused-session.json` — if present, what was the pause reason?
+
+## Step 2: Reconstruct the failure from the activity log
+
+Activity JSONL format:
+- Each line is `{type: "message", message: {...}}`.
+- `message.role: "assistant"` → `content[]` with `type: "text"` reasoning and `type: "toolCall"` invocations.
+- `message.role: "toolResult"` → `{toolCallId, toolName, isError, content}`.
+- `usage` on assistant messages tracks tokens and cost.
+
+To trace a failure:
+1. Search for `isError: true` tool results in the last activity log. That's usually the proximate symptom.
+2. Walk backwards to the assistant message that made the call. Read the `text` content — that's the agent's reasoning at the moment of failure.
+3. Keep walking back. Find where the agent's model of the state diverged from reality.
+
+## Step 3: Cross-reference the journal
+
+For each symptom from the activity log, find the matching journal events:
+- `stuck-detected` + same `flowId` → the loop detected repetition. `data.reason` says why.
+- `guard-block` → a dispatch guard refused to run a unit. Check `data.reason` and trace to `dispatch-guard.ts` logic.
+- `unit-end` followed by another `unit-start` for the same `unitId` → re-dispatch. If tied to `stuck-detected`, the artifact verification failed after the unit succeeded.
+- `terminal` → auto-mode decided to stop. `data.reason` tells you why.
+
+Use `flowId` to reconstruct one iteration; use `causedBy` to follow causal chains across iterations.
+
+## Step 4: Name the root cause
+
+A good root cause is:
+- Specific: a function, a state transition, a missing guard.
+- Falsifiable: if we changed X, would the failure go away?
+- Sourced: cites a file and (where applicable) a line number.
+
+Bad root cause: "Auto-mode got stuck in a loop." Good root cause: "After slice completion, `auto-unit-closeout.ts` emits `unit-end` before `auto-post-unit.ts` updates the roadmap checkbox. The next `iteration-start` finds the same unit `[ ]` and re-dispatches — `dispatch-guard.ts:42` has no check against the freshly-ended `unitId`."
+
+Consult the source map in `src/resources/extensions/sf/prompts/forensics.md` to map symptoms to the likely domain files.
+
+## Step 5: Propose a fix
+
+For the root cause:
+- Which file and function holds the bug?
+- What minimal change would eliminate it?
+- What test would have caught it? Can one be added?
+- Is this a regression from a recent commit? (Run `git log -- path/to/file.ts` mentally; do NOT run git commands that could modify state.)
+
+## Step 6: Write the report
+
+Format the output as a GitHub-issue-ready report:
+
+```markdown
+## Symptom
+
+<what the user saw — quote the error or describe the observed behavior>
+
+## Evidence Trail
+
+1. `.sf/auto.lock` — <state: stale / fresh>
+2. `.sf/activity/042-slice-S02.jsonl:128` — <isError: true from `sf_task_complete`>
+3. `.sf/journal/2026-04-19.jsonl:87` — <stuck-detected flowId 7a3c…>
+4. `.sf/metrics.json` — <unit type/id "slice/S02" appears 3 times>
+
+## Root Cause
+
+<specific named cause — file, function, state transition>
+
+`src/resources/extensions/sf/auto-unit-closeout.ts:<line>`: <exactly what goes wrong>
+
+## Proposed Fix
+
+<minimal change — file, function, what to change>
+
+## Test
+
+<what test would have caught this; whether one should be added>
+
+## Confidence
+
+<high / medium / low> — <what would change this confidence>
+```
+
+Offer to file this as a GitHub issue via `mcp__github__issue_write` — explicit confirmation required per the outward-action rule. Also save a copy to `.sf/forensics/<slug>.md` for future reference.
+
+</process>
+
+<anti_patterns>
+
+- **Running any `sf_*` write tool during forensics.** Evidence stays pristine.
+- **Re-running the auto-mode loop to "reproduce."** That overwrites the activity log. Read the existing one.
+- **Vague root cause.** "There's a race" is not a root cause. Name the race.
+- **No citations.** Every claim gets an artifact path.
+- **Skipping the journal.** The journal is the only view that shows dispatch-level decisions.
+- **Auto-filing the GitHub issue.** Outward actions need confirmation.
+
+</anti_patterns>
+
+<success_criteria>
+
+- [ ] The symptom is quoted, not paraphrased.
+- [ ] Every claim in the evidence trail cites a file and a line or field.
+- [ ] The root cause names a specific file, function, or state transition.
+- [ ] The proposed fix is minimal and falsifiable.
+- [ ] Confidence is stated honestly.
+- [ ] Report is saved under `.sf/forensics/` even if not filed as an issue.
+
+</success_criteria>
--- a/src/resources/workflows/build-from-spec.md
+++ b/src/resources/workflows/build-from-spec.md
@ -0,0 +1,184 @@
+# Build From Spec
+
+End-to-end workflow: take a product idea or specification, produce working software.
+
+## Prerequisites
+
+- `sf` CLI installed (`npm install -g singularity-forge`)
+- A directory for the project (can be empty)
+- Git initialized in the directory
+
+## Process
+
+### Step 1: Prepare the project directory
+
+```bash
+PROJECT_DIR="/tmp/my-project-name"
+mkdir -p "$PROJECT_DIR"
+cd "$PROJECT_DIR"
+git init 2>/dev/null  # SF needs a git repo
+```
+
+### Step 2: Write the spec file
+
+Write a spec file that describes what to build. More detail = better results.
+
+```bash
+cat > spec.md << 'SPEC'
+# Product Name
+
+## What
+[Concrete description of what to build]
+
+## Requirements
+- [Specific, testable requirement 1]
+- [Specific, testable requirement 2]
+- [Specific, testable requirement 3]
+
+## Technical Constraints
+- [Language, framework, or platform requirements]
+- [External services or APIs involved]
+- [Performance or security requirements]
+
+## Out of Scope
+- [Things explicitly NOT included]
+SPEC
+```
+
+**Spec quality matters.** Vague specs produce vague results. Include:
+- What the user can DO when it's done (not what code to write)
+- Technical constraints (language, framework, Node version)
+- What's out of scope (prevents scope creep)
+
+### Step 3: Launch the build
+
+**Fire-and-forget (simplest — SF does everything):**
+```bash
+cd "$PROJECT_DIR"
+RESULT=$(sf headless --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
+EXIT=$?
+```
+
+`--timeout 0` disables the timeout for long builds. `--auto` chains milestone creation into execution.
+
+**With budget limit:**
+```bash
+# Use step-by-step mode with budget checks instead of auto
+# See workflows/step-by-step.md
+```
+
+**For CI or ecosystem runs (no user config):**
+```bash
+RESULT=$(sf headless --bare --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
+EXIT=$?
+```
+
+### Step 4: Handle the result
+
+```bash
+case $EXIT in
+  0)
+    # Success — verify deliverables
+    STATUS=$(echo "$RESULT" | jq -r '.status')
+    COST=$(echo "$RESULT" | jq -r '.cost.total')
+    COMMITS=$(echo "$RESULT" | jq -r '.commits | length')
+    echo "Build complete: $STATUS, cost: \$$COST, commits: $COMMITS"
+
+    # Inspect what was built
+    sf headless query | jq '.state.progress'
+
+    # Check the actual files
+    ls -la "$PROJECT_DIR"
+    ;;
+  1)
+    # Error — inspect and decide
+    echo "Build failed"
+    echo "$RESULT" | jq '{status: .status, phase: .phase}'
+
+    # Check state for details
+    sf headless query | jq '.state'
+    ;;
+  10)
+    # Blocked — needs intervention
+    echo "Build blocked — needs human input"
+    sf headless query | jq '{phase: .state.phase, blockers: .state.blockers}'
+
+    # Options: steer, supply answers, or escalate
+    # See workflows/monitor-and-poll.md for blocker handling
+    ;;
+  11)
+    echo "Build was cancelled"
+    ;;
+esac
+```
+
+### Step 5: Verify deliverables
+
+After a successful build, verify the output:
+
+```bash
+cd "$PROJECT_DIR"
+
+# Check project state
+sf headless query | jq '{
+  phase: .state.phase,
+  progress: .state.progress,
+  cost: .cost.total
+}'
+
+# Check git log for what was built
+git log --oneline
+
+# Run the project's own tests if they exist
+[ -f package.json ] && npm test 2>/dev/null
+[ -f Makefile ] && make test 2>/dev/null
+```
+
+## Complete Example
+
+```bash
+# 1. Setup
+mkdir -p /tmp/todo-api && cd /tmp/todo-api && git init
+
+# 2. Write spec
+cat > spec.md << 'SPEC'
+# Todo API
+
+Build a REST API for managing todo items using Node.js and Express.
+
+## Requirements
+- GET /todos — list all todos
+- POST /todos — create a todo (title, completed)
+- PUT /todos/:id — update a todo
+- DELETE /todos/:id — delete a todo
+- Todos stored in-memory (no database)
+- Input validation with descriptive error messages
+- Health check endpoint at GET /health
+
+## Technical Constraints
+- Node.js with ESM modules
+- Express framework
+- No external database — in-memory array
+- Port configurable via PORT env var (default 3000)
+
+## Out of Scope
+- Authentication
+- Persistent storage
+- Frontend
+SPEC
+
+# 3. Launch
+RESULT=$(sf headless --output-format json --timeout 0 --context spec.md new-milestone --auto 2>/dev/null)
+EXIT=$?
+
+# 4. Report
+if [ $EXIT -eq 0 ]; then
+  COST=$(echo "$RESULT" | jq -r '.cost.total')
+  echo "Build complete (\$$COST)"
+  echo "Files created:"
+  find . -not -path './.sf/*' -not -path './.git/*' -type f
+else
+  echo "Build failed (exit $EXIT)"
+  echo "$RESULT" | jq .
+fi
+```
--- a/src/resources/workflows/monitor-and-poll.md
+++ b/src/resources/workflows/monitor-and-poll.md
@ -0,0 +1,187 @@
+# Monitor and Poll
+
+Check status of a SF project, handle blockers, track costs, and decide next actions.
+
+## Checking Project State
+
+The `query` command is your primary monitoring tool. It's instant (~50ms), costs nothing (no LLM), and returns the full project snapshot.
+
+```bash
+cd /path/to/project
+sf headless query
+```
+
+### Key fields to inspect
+
+```bash
+# Overall status
+sf headless query | jq '{
+  phase: .state.phase,
+  milestone: .state.activeMilestone.id,
+  slice: .state.activeSlice.id,
+  task: .state.activeTask.id,
+  progress: .state.progress,
+  cost: .cost.total
+}'
+
+# What should happen next
+sf headless query | jq '.next'
+# Returns: { "action": "dispatch", "unitType": "execute-task", "unitId": "M001/S01/T01" }
+
+# Is it done?
+sf headless query | jq '.state.phase'
+# "complete" = done, "blocked" = needs you, anything else = in progress
+```
+
+### Phase meanings
+
+| Phase | Meaning | Your action |
+|-------|---------|-------------|
+| `pre-planning` | Milestone exists, no slices planned yet | Run `auto` or `next` |
+| `needs-discussion` | Ambiguities need resolution | Supply answers or run with defaults |
+| `discussing` | Discussion in progress | Wait |
+| `researching` | Codebase/library research | Wait |
+| `planning` | Creating task plans | Wait |
+| `executing` | Writing code | Wait |
+| `verifying` | Checking must-haves | Wait |
+| `summarizing` | Recording what happened | Wait |
+| `advancing` | Moving to next task/slice | Wait |
+| `evaluating-gates` | Quality checks before execution | Wait or run `next` |
+| `validating-milestone` | Final milestone checks | Wait |
+| `completing-milestone` | Archiving and cleanup | Wait |
+| `complete` | Done | Verify deliverables |
+| `blocked` | Needs human input | Handle blocker (see below) |
+| `paused` | Explicitly paused | Resume with `auto` |
+
+## Handling Blockers
+
+When exit code is `10` or phase is `blocked`:
+
+```bash
+# 1. Understand the blocker
+sf headless query | jq '{phase: .state.phase, blockers: .state.blockers, nextAction: .state.nextAction}'
+
+# 2. Option A: Steer around it
+sf headless steer "Skip the database dependency, use in-memory storage instead"
+
+# 3. Option B: Supply pre-built answers
+cat > fix.json << 'EOF'
+{
+  "questions": { "blocked_question_id": "workaround_option" },
+  "defaults": { "strategy": "first_option" }
+}
+EOF
+sf headless --answers fix.json auto
+
+# 4. Option C: Force a specific phase
+sf headless dispatch replan
+
+# 5. Option D: Escalate to user
+echo "SF build blocked. Phase: $(sf headless query | jq -r '.state.phase')"
+echo "Manual intervention required."
+```
+
+## Cost Tracking
+
+```bash
+# Current cumulative cost
+sf headless query | jq '.cost.total'
+
+# Per-worker breakdown
+sf headless query | jq '.cost.workers'
+
+# After a step (from HeadlessJsonResult)
+RESULT=$(sf headless --output-format json next 2>/dev/null)
+echo "$RESULT" | jq '.cost'
+```
+
+### Budget enforcement pattern
+
+```bash
+MAX_BUDGET=15.00
+
+check_budget() {
+  TOTAL=$(sf headless query | jq -r '.cost.total')
+  OVER=$(echo "$TOTAL > $MAX_BUDGET" | bc -l)
+  if [ "$OVER" = "1" ]; then
+    echo "Budget exceeded: \$$TOTAL > \$$MAX_BUDGET"
+    sf headless stop
+    return 1
+  fi
+  return 0
+}
+```
+
+## Poll-and-React Loop
+
+For agents that need to periodically check on a build:
+
+```bash
+cd /path/to/project
+
+poll_project() {
+  STATE=$(sf headless query 2>/dev/null)
+  if [ -z "$STATE" ]; then
+    echo "NO_PROJECT"
+    return
+  fi
+
+  PHASE=$(echo "$STATE" | jq -r '.state.phase')
+  COST=$(echo "$STATE" | jq -r '.cost.total')
+  PROGRESS=$(echo "$STATE" | jq -r '"\(.state.progress.milestones.done)/\(.state.progress.milestones.total) milestones, \(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"')
+
+  case "$PHASE" in
+    complete)
+      echo "COMPLETE cost=\$$COST progress=$PROGRESS"
+      ;;
+    blocked)
+      BLOCKER=$(echo "$STATE" | jq -r '.state.nextAction // "unknown"')
+      echo "BLOCKED reason=$BLOCKER cost=\$$COST"
+      ;;
+    *)
+      NEXT=$(echo "$STATE" | jq -r '.next.action // "none"')
+      echo "IN_PROGRESS phase=$PHASE next=$NEXT cost=\$$COST progress=$PROGRESS"
+      ;;
+  esac
+}
+```
+
+## Resuming Work
+
+If a build was interrupted or you need to continue:
+
+```bash
+cd /path/to/project
+
+# Check current state
+sf headless query | jq '.state.phase'
+
+# Resume from where it left off
+sf headless --output-format json auto 2>/dev/null
+
+# Or resume a specific session
+sf headless --resume "$SESSION_ID" --output-format json auto 2>/dev/null
+```
+
+## Reading Build Artifacts
+
+After completion, inspect what SF produced:
+
+```bash
+cd /path/to/project
+
+# Project summary
+cat .sf/PROJECT.md
+
+# What was decided
+cat .sf/DECISIONS.md
+
+# Requirements and their validation status
+cat .sf/REQUIREMENTS.md
+
+# Milestone summary
+cat .sf/milestones/M001-*/M001-*-SUMMARY.md 2>/dev/null
+
+# Git history (SF commits per-slice)
+git log --oneline
+```
--- a/src/resources/workflows/step-by-step.md
+++ b/src/resources/workflows/step-by-step.md
@ -0,0 +1,156 @@
+# Step-by-Step Execution
+
+Run SF one unit at a time with decision points between steps. Use this when you need
+control over execution — budget enforcement, progress reporting, conditional logic,
+or the ability to steer mid-build.
+
+## When to use this vs `auto`
+
+| Approach | Use when |
+|----------|----------|
+| `auto` | You trust the build, just want the result |
+| `next` loop | You need budget checks, progress updates, or intervention points |
+
+## Core Loop
+
+```bash
+cd /path/to/project
+MAX_BUDGET=20.00
+TOTAL_COST=0
+
+while true; do
+  # Run one unit
+  RESULT=$(sf headless --output-format json next 2>/dev/null)
+  EXIT=$?
+
+  # Parse result
+  STATUS=$(echo "$RESULT" | jq -r '.status')
+  STEP_COST=$(echo "$RESULT" | jq -r '.cost.total')
+  PHASE=$(echo "$RESULT" | jq -r '.phase // empty')
+  SESSION_ID=$(echo "$RESULT" | jq -r '.sessionId // empty')
+
+  # Handle exit codes
+  case $EXIT in
+    0) ;; # success — continue
+    1)
+      echo "Step failed: $STATUS"
+      break
+      ;;
+    10)
+      echo "Blocked — needs intervention"
+      sf headless query | jq '.state'
+      break
+      ;;
+    11)
+      echo "Cancelled"
+      break
+      ;;
+  esac
+
+  # Check if milestone complete
+  CURRENT_PHASE=$(sf headless query | jq -r '.state.phase')
+  if [ "$CURRENT_PHASE" = "complete" ]; then
+    TOTAL_COST=$(sf headless query | jq -r '.cost.total')
+    echo "Milestone complete. Total cost: \$$TOTAL_COST"
+    break
+  fi
+
+  # Budget check
+  TOTAL_COST=$(sf headless query | jq -r '.cost.total')
+  OVER=$(echo "$TOTAL_COST > $MAX_BUDGET" | bc -l)
+  if [ "$OVER" = "1" ]; then
+    echo "Budget limit (\$$MAX_BUDGET) exceeded at \$$TOTAL_COST"
+    sf headless stop
+    break
+  fi
+
+  # Progress report
+  PROGRESS=$(sf headless query | jq -r '"\(.state.progress.tasks.done)/\(.state.progress.tasks.total) tasks"')
+  echo "Step done ($STATUS). Phase: $CURRENT_PHASE, Progress: $PROGRESS, Cost: \$$TOTAL_COST"
+done
+```
+
+## Step-by-Step with Spec Creation
+
+Complete flow from idea to working code with full control:
+
+```bash
+# 1. Setup
+PROJECT_DIR="/tmp/my-project"
+mkdir -p "$PROJECT_DIR" && cd "$PROJECT_DIR" && git init 2>/dev/null
+
+# 2. Write spec
+cat > spec.md << 'SPEC'
+[Your spec here]
+SPEC
+
+# 3. Create the milestone (planning only, no execution)
+RESULT=$(sf headless --output-format json --context spec.md new-milestone 2>/dev/null)
+EXIT=$?
+
+if [ $EXIT -ne 0 ]; then
+  echo "Milestone creation failed"
+  echo "$RESULT" | jq .
+  exit 1
+fi
+
+echo "Milestone created. Starting execution..."
+
+# 4. Execute step-by-step
+STEP=0
+while true; do
+  STEP=$((STEP + 1))
+  RESULT=$(sf headless --output-format json next 2>/dev/null)
+  EXIT=$?
+
+  [ $EXIT -ne 0 ] && break
+
+  PHASE=$(sf headless query | jq -r '.state.phase')
+  COST=$(sf headless query | jq -r '.cost.total')
+
+  echo "Step $STEP complete. Phase: $PHASE, Cost: \$$COST"
+
+  [ "$PHASE" = "complete" ] && break
+done
+
+echo "Build finished in $STEP steps"
+```
+
+## Intervention Patterns
+
+### Steer mid-execution
+
+If you detect the build going in the wrong direction:
+
+```bash
+# Check what's happening
+sf headless query | jq '{phase: .state.phase, task: .state.activeTask}'
+
+# Redirect
+sf headless steer "Use SQLite instead of PostgreSQL for storage"
+
+# Continue
+sf headless --output-format json next 2>/dev/null
+```
+
+### Skip a stuck unit
+
+```bash
+sf headless skip
+sf headless --output-format json next 2>/dev/null
+```
+
+### Undo last completed unit
+
+```bash
+sf headless undo --force
+sf headless --output-format json next 2>/dev/null
+```
+
+### Force a specific phase
+
+```bash
+sf headless dispatch replan   # Re-plan the current slice
+sf headless dispatch execute  # Skip to execution
+sf headless dispatch uat      # Jump to user acceptance testing
+```
--- a/src/tests/initial-sf-header-filter.test.ts
+++ b/src/tests/initial-sf-header-filter.test.ts
@ -0,0 +1,67 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+
+const { filterInitialSfHeader: filterInitialSfHeader } = await import(
+	"../../web/lib/initial-sf-header-filter.ts"
+);
+
+const SF_LOGO_LINES = [
+	"   ██████╗ ███████╗██████╗ ",
+	"  ██╔════╝ ██╔════╝██╔══██╗",
+	"  ██║  ███╗███████╗██║  ██║",
+	"  ██║   ██║╚════██║██║  ██║",
+	"  ╚██████╔╝███████║██████╔╝",
+	"   ╚═════╝ ╚══════╝╚═════╝ ",
+] as const;
+
+test("filterInitialSfHeader strips a plain startup banner and keeps real terminal content", () => {
+	const warning = "Warning: Google Search is not configured.";
+	const raw = [
+		...SF_LOGO_LINES,
+		"  Singularity Forge v2.33.1",
+		"",
+		warning,
+	].join("\n");
+
+	const result = filterInitialSfHeader(raw);
+
+	assert.equal(result.status, "matched");
+	assert.equal(result.text, warning);
+});
+
+test("filterInitialSfHeader strips ANSI-colored startup banner output", () => {
+	const cyan = "\u001b[36m";
+	const reset = "\u001b[39m";
+	const bold = "\u001b[1m";
+	const boldReset = "\u001b[22m";
+	const dim = "\u001b[2m";
+	const dimReset = "\u001b[22m";
+	const warning = "Warning: terminal content starts here.\r\n";
+
+	const raw =
+		SF_LOGO_LINES.map((line) => `${cyan}${line}${reset}\r\n`).join("") +
+		`  ${bold}Singularity Forge${boldReset} ${dim}v2.33.1${dimReset}\r\n\r\n` +
+		warning;
+
+	const result = filterInitialSfHeader(raw);
+
+	assert.equal(result.status, "matched");
+	assert.equal(result.text, warning);
+});
+
+test("filterInitialSfHeader waits for more data when the startup banner is incomplete", () => {
+	const partial = `${SF_LOGO_LINES[0]}\n${SF_LOGO_LINES[1]}\n${SF_LOGO_LINES[2]}`;
+
+	const result = filterInitialSfHeader(partial);
+
+	assert.deepEqual(result, { status: "needs-more", text: "" });
+});
+
+test("filterInitialSfHeader passes normal terminal output through untouched", () => {
+	const raw = "Warning: already in the shell\r\n$ ";
+
+	const result = filterInitialSfHeader(raw);
+
+	assert.equal(result.status, "passthrough");
+	assert.equal(result.text, raw);
+});
--- a/web/proxy.ts
+++ b/web/proxy.ts
@ -0,0 +1,80 @@
+import { NextResponse, type NextRequest } from "next/server"
+
+/**
+ * Next.js proxy — validates bearer token and origin on all API routes.
+ *
+ * The SF_WEB_AUTH_TOKEN env var is set at server launch. Every /api/* request
+ * must carry a matching `Authorization: Bearer <token>` header. EventSource
+ * (SSE) connections may use the `_token` query parameter instead since the
+ * EventSource API cannot set custom headers.
+ *
+ * Additionally, if an `Origin` header is present, it must match the expected
+ * localhost origin to prevent cross-site request forgery.
+ */
+export function proxy(request: NextRequest): NextResponse | undefined {
+  const { pathname } = request.nextUrl
+
+  // Only gate API routes
+  if (!pathname.startsWith("/api/")) return NextResponse.next()
+
+  const expectedToken = process.env.SF_WEB_AUTH_TOKEN
+  if (!expectedToken) {
+    // If no token was configured (e.g. dev mode without launch harness),
+    // allow everything — the server didn't opt into auth.
+    return NextResponse.next()
+  }
+
+  // ── Origin / CORS check ────────────────────────────────────────────
+  const origin = request.headers.get("origin")
+  if (origin) {
+    const host = process.env.SF_WEB_HOST || "127.0.0.1"
+    const port = process.env.SF_WEB_PORT || "3000"
+
+    // Default: localhost origin for the launched host:port
+    const allowed = new Set([`http://${host}:${port}`])
+
+    // SF_WEB_ALLOWED_ORIGINS lets users whitelist additional origins for
+    // secure tunnel setups (Tailscale Serve, Cloudflare Tunnel, ngrok, etc.)
+    const extra = process.env.SF_WEB_ALLOWED_ORIGINS
+    if (extra) {
+      for (const entry of extra.split(",")) {
+        const trimmed = entry.trim()
+        if (trimmed) allowed.add(trimmed)
+      }
+    }
+
+    if (!allowed.has(origin)) {
+      return NextResponse.json(
+        { error: "Forbidden: origin mismatch" },
+        { status: 403 },
+      )
+    }
+  }
+
+  // ── Bearer token check ─────────────────────────────────────────────
+  let token: string | null = null
+
+  // 1. Authorization header (preferred)
+  const authHeader = request.headers.get("authorization")
+  if (authHeader?.startsWith("Bearer ")) {
+    token = authHeader.slice(7)
+  }
+
+  // 2. Query parameter fallback for EventSource / SSE
+  if (!token) {
+    token = request.nextUrl.searchParams.get("_token")
+  }
+
+  if (!token || token !== expectedToken) {
+    return NextResponse.json(
+      { error: "Unauthorized" },
+      { status: 401 },
+    )
+  }
+
+  return NextResponse.next()
+}
+
+export const config = {
+  matcher: "/api/:path*",
+}
				`@ -0,0 +1 @@`
				`# Raw Dump Inbox\n\n## Eval Candidates\n\n1. Test note for CI mode verification`
				`@ -0,0 +1 @@`
				`Reference slot for design-system guidance intended for LLM consumption.`
				`@ -0,0 +1 @@`
				`Reference slot for Nixpacks deployment/build guidance intended for LLM consumption.`
				`@ -0,0 +1 @@`
				`Reference slot for uv/Python tooling guidance intended for LLM consumption.`