Rename all four packages/pi-* directories to forge-native names, stripping the 'pi' identity and establishing forge's own: - packages/pi-coding-agent → packages/coding-agent - packages/pi-ai → packages/ai - packages/pi-agent-core → packages/agent-core - packages/pi-tui → packages/tui Package names updated: - @singularity-forge/pi-coding-agent → @singularity-forge/coding-agent - @singularity-forge/pi-ai → @singularity-forge/ai - @singularity-forge/pi-agent-core → @singularity-forge/agent-core - @singularity-forge/pi-tui → @singularity-forge/tui All import references, bare string references, path references, internal variable names (_bundledPi*), and dist files updated. @mariozechner/pi-* third-party compat aliases preserved. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
266 lines
6.8 KiB
JavaScript
266 lines
6.8 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import { spawnSync } from "node:child_process";
|
|
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
import { homedir } from "node:os";
|
|
import { dirname, resolve } from "node:path";
|
|
import { performance } from "node:perf_hooks";
|
|
|
|
const repoRoot = resolve(import.meta.dirname, "..");
|
|
const defaultOutputPath = resolve(
|
|
repoRoot,
|
|
".sf",
|
|
"model-benchmarks",
|
|
`${new Date().toISOString().replace(/[:.]/g, "-")}.json`,
|
|
);
|
|
|
|
const args = parseArgs(process.argv.slice(2));
|
|
const modelsArg = args.models ?? args.model;
|
|
const outputPath = resolve(args.output ?? defaultOutputPath);
|
|
const maxModels = Number.parseInt(
|
|
args.maxModels ?? args["max-models"] ?? "8",
|
|
10,
|
|
);
|
|
const maxTokens = Number.parseInt(
|
|
args.maxTokens ?? args["max-tokens"] ?? "420",
|
|
10,
|
|
);
|
|
|
|
await loadSfScopedEnv();
|
|
|
|
const { getModel, streamSimpleOpenAICompletions } = await import(
|
|
"../packages/ai/src/index.ts"
|
|
);
|
|
|
|
const modelIds = modelsArg
|
|
? modelsArg
|
|
.split(",")
|
|
.map((s) => s.trim())
|
|
.filter(Boolean)
|
|
: [
|
|
"kimi-coding/kimi-k2.6",
|
|
"minimax/MiniMax-M2.7-highspeed",
|
|
"zai/glm-4.5",
|
|
"mistral/devstral-latest",
|
|
"alibaba-coding-plan/qwen3-coder-plus",
|
|
"xiaomi/mimo-v2-pro",
|
|
"opencode-go/minimax-m2.7",
|
|
"openrouter/inclusionai/ling-2.6-1t:free",
|
|
];
|
|
|
|
const tasks = [
|
|
{
|
|
id: "json-repair",
|
|
maxTokens: Math.min(maxTokens, 280),
|
|
prompt: `Return ONLY valid JSON matching { "bug": string, "fix": string, "tests": string[] }.
|
|
Broken payload: {"bug":"path traversal\\n- accepts ../foo","fix":123,"tests":"none"}.
|
|
Normalize it semantically; no markdown.`,
|
|
check: (text) => {
|
|
try {
|
|
const parsed = JSON.parse(text);
|
|
return (
|
|
typeof parsed.bug === "string" &&
|
|
typeof parsed.fix === "string" &&
|
|
Array.isArray(parsed.tests)
|
|
);
|
|
} catch {
|
|
return false;
|
|
}
|
|
},
|
|
},
|
|
{
|
|
id: "path-debug",
|
|
maxTokens,
|
|
prompt: `Find the bug and propose the minimal patch. Code:
|
|
function isSafe(base, target) {
|
|
const resolved = path.resolve(base, target)
|
|
return resolved.startsWith(base)
|
|
}
|
|
Explain why it is unsafe in <= 8 bullets, then provide a corrected JS function.`,
|
|
check: (text) =>
|
|
/startsWith|prefix/i.test(text) &&
|
|
/path\.sep|relative|normalize|resolve/i.test(text),
|
|
},
|
|
{
|
|
id: "routing-plan",
|
|
maxTokens,
|
|
prompt: `Produce a concise implementation plan with risks and verification for migrating an LLM routing table from alias k2p5 to semantic ids kimi-k2.5 and kimi-k2.6.`,
|
|
check: (text) =>
|
|
/kimi-k2\.5/.test(text) &&
|
|
/kimi-k2\.6/.test(text) &&
|
|
/test|verify|validation/i.test(text),
|
|
},
|
|
];
|
|
|
|
const selectedModels = modelIds.slice(
|
|
0,
|
|
Number.isFinite(maxModels) ? maxModels : modelIds.length,
|
|
);
|
|
const results = [];
|
|
|
|
for (const fullId of selectedModels) {
|
|
const slash = fullId.indexOf("/");
|
|
if (slash === -1) {
|
|
results.push({
|
|
model: fullId,
|
|
ok: false,
|
|
error: "expected provider/model id",
|
|
});
|
|
continue;
|
|
}
|
|
const provider = fullId.slice(0, slash);
|
|
const modelId = fullId.slice(slash + 1);
|
|
const model = getModel(provider, modelId);
|
|
if (!model) {
|
|
results.push({
|
|
model: fullId,
|
|
ok: false,
|
|
error: "model not found in registry",
|
|
});
|
|
continue;
|
|
}
|
|
|
|
for (const task of tasks) {
|
|
const started = performance.now();
|
|
let text = "";
|
|
let result;
|
|
try {
|
|
const stream = streamSimpleOpenAICompletions(
|
|
model,
|
|
{
|
|
systemPrompt:
|
|
"You are a precise software engineering benchmark model. Follow requested output formats exactly.",
|
|
messages: [
|
|
{ role: "user", content: task.prompt, timestamp: Date.now() },
|
|
],
|
|
},
|
|
{ temperature: 0, maxTokens: task.maxTokens },
|
|
);
|
|
for await (const event of stream) {
|
|
if (event.type === "text_delta") text += event.delta;
|
|
}
|
|
result = await stream.result();
|
|
} catch (error) {
|
|
results.push({
|
|
model: fullId,
|
|
task: task.id,
|
|
ok: false,
|
|
elapsedMs: Math.round(performance.now() - started),
|
|
error: error instanceof Error ? error.message : String(error),
|
|
});
|
|
continue;
|
|
}
|
|
|
|
const elapsedMs = Math.round(performance.now() - started);
|
|
const passed = result.stopReason !== "error" && task.check(text);
|
|
results.push({
|
|
model: fullId,
|
|
task: task.id,
|
|
ok: passed,
|
|
stopReason: result.stopReason,
|
|
errorMessage: result.errorMessage,
|
|
elapsedMs,
|
|
chars: text.length,
|
|
usage: result.usage,
|
|
sample: text.slice(0, 700),
|
|
});
|
|
console.log(
|
|
`${passed ? "PASS" : "FAIL"} ${fullId} ${task.id} ${elapsedMs}ms ${result.stopReason}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
const report = {
|
|
createdAt: new Date().toISOString(),
|
|
models: selectedModels,
|
|
tasks: tasks.map((t) => t.id),
|
|
results,
|
|
};
|
|
|
|
mkdirSync(dirname(outputPath), { recursive: true });
|
|
writeFileSync(outputPath, `${JSON.stringify(report, null, 2)}\n`);
|
|
console.log(`wrote ${outputPath}`);
|
|
|
|
function parseArgs(argv) {
|
|
const parsed = {};
|
|
for (let i = 0; i < argv.length; i++) {
|
|
const arg = argv[i];
|
|
if (!arg.startsWith("--")) continue;
|
|
const key = arg.slice(2);
|
|
const next = argv[i + 1];
|
|
if (!next || next.startsWith("--")) {
|
|
parsed[key] = "true";
|
|
} else {
|
|
parsed[key] = next;
|
|
i++;
|
|
}
|
|
}
|
|
return parsed;
|
|
}
|
|
|
|
async function loadSfScopedEnv() {
|
|
const secretsFile = `${homedir()}/.dotfiles/secrets/api-keys.yaml`;
|
|
const sopsConfig = `${homedir()}/.dotfiles/.sops.yaml`;
|
|
const wrapperPath = `${homedir()}/.local/bin/sf`;
|
|
const envNames = readSfScopedEnvNames(wrapperPath);
|
|
for (const name of envNames) delete process.env[name];
|
|
|
|
const decrypted = spawnSync(
|
|
"sops",
|
|
["--config", sopsConfig, "-d", secretsFile],
|
|
{
|
|
encoding: "utf8",
|
|
stdio: ["ignore", "pipe", "ignore"],
|
|
},
|
|
);
|
|
if (decrypted.status !== 0 || !decrypted.stdout) return;
|
|
|
|
const extracted = spawnSync(
|
|
"yq",
|
|
[
|
|
"-r",
|
|
`(
|
|
(.sf // {} | to_entries[]
|
|
| select((.value | type) == "string" or (.value | type) == "number" or (.value | type) == "boolean")
|
|
| select(.value != null and .value != "")
|
|
| "\\(.key)=\\(.value)"),
|
|
(.sf.env // {} | to_entries[]
|
|
| select(.value != null and .value != "")
|
|
| "\\(.key)=\\(.value)"),
|
|
(.sf.providers // {} | to_entries[]
|
|
| (.value.env // {})
|
|
| to_entries[]
|
|
| select(.value != null and .value != "")
|
|
| "\\(.key)=\\(.value)")
|
|
)`,
|
|
],
|
|
{
|
|
input: decrypted.stdout,
|
|
encoding: "utf8",
|
|
stdio: ["pipe", "pipe", "ignore"],
|
|
},
|
|
);
|
|
if (extracted.status !== 0 || !extracted.stdout) return;
|
|
|
|
for (const line of extracted.stdout.split(/\r?\n/)) {
|
|
const idx = line.indexOf("=");
|
|
if (idx <= 0) continue;
|
|
const key = line.slice(0, idx);
|
|
const value = line.slice(idx + 1);
|
|
if (/^[A-Za-z_][A-Za-z0-9_]*$/.test(key) && value) process.env[key] = value;
|
|
}
|
|
}
|
|
|
|
function readSfScopedEnvNames(wrapperPath) {
|
|
try {
|
|
const source = readFileSync(wrapperPath, "utf8");
|
|
const match = source.match(/sf_scoped_env=\(\n([\s\S]*?)\n\)/);
|
|
if (!match) return [];
|
|
return match[1]
|
|
.split(/\r?\n/)
|
|
.map((line) => line.trim())
|
|
.filter((line) => /^[A-Z0-9_]+$/.test(line));
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|