singularity-forge/src/resources/extensions/sf/metrics-central.js

/**
 * Centralized Metrics Collector — Unified metrics sink for all SF subsystems.
 *
 * Purpose: Replace scattered metrics emission (DB, Prometheus, stderr, JSONL)
 * with a single collector that aggregates counters, gauges, and histograms,
 * then exposes them in Prometheus text format AND persists to SQLite for
 * queryable historical analysis.
 *
 * Consumer: /uok status, health widgets, external Prometheus scrapers,
 * TUI cost/context overlay, and programmatic queries via sf-db.
 *
 * Design:
 * - In-memory aggregation with configurable flush interval
 * - Prometheus text format output (compatible with existing exposition)
 * - SQLite persistence for historical queries (session-scoped)
 * - Cost/token metrics alongside operational metrics
 * - Retry with exponential backoff on flush failures
 * - Zero external dependencies
 */

import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { DatabaseSync } from "node:sqlite";
import { sfRoot } from "./paths.js";
import { logWarning } from "./workflow-logger.js";

const FLUSH_INTERVAL_MS = 60_000; // 1 minute
const MAX_HISTOGRAM_BUCKETS = 10;
const FLUSH_RETRY_MAX = 3;
const FLUSH_RETRY_BASE_MS = 1000;
const METRIC_NAME_PATTERN = /^[a-zA-Z_:][a-zA-Z0-9_:]*$/;
const METRICS_DB_ROW_CAP = 10_000; // keep newest N rows; prune on flush when exceeded

// ─── Metrics System Performance Monitoring ──────────────────────────────────

let _metricsSystemStartTime = Date.now();
let _flushCount = 0;
let _flushSuccessCount = 0;
let _flushFailureCount = 0;
let _lastFlushDuration = 0;
let _lastFlushTimestamp = 0;
let _totalFlushDuration = 0;

/**
 * Get metrics system performance stats.
 */
export function getMetricsSystemStats() {
	const uptime = Date.now() - _metricsSystemStartTime;
	return {
		uptimeMs: uptime,
		uptimeSeconds: Math.floor(uptime / 1000),
		flushCount: _flushCount,
		flushSuccessCount: _flushSuccessCount,
		flushFailureCount: _flushFailureCount,
		successRate:
			_flushCount > 0
				? `${((_flushSuccessCount / _flushCount) * 100).toFixed(1)}%`
				: "0%",
		lastFlushDuration: _lastFlushDuration,
		lastFlushTimestamp: _lastFlushTimestamp,
		averageFlushDuration:
			_flushSuccessCount > 0
				? Math.round(_totalFlushDuration / _flushSuccessCount)
				: 0,
		databaseStatus: _metricsDb ? "connected" : "disconnected",
	};
}

/**
 * Get system performance dashboard metrics.
 * Returns a formatted summary of key performance indicators.
 */
export function getSystemPerformanceDashboard() {
	const systemStats = getMetricsSystemStats();
	const registry = getRegistry();

	return {
		uptime: systemStats.uptimeSeconds,
		metricsSystemHealth: {
			status: systemStats.databaseStatus,
			successRate: systemStats.successRate,
			flushCount: systemStats.flushCount,
			averageFlushDuration: `${systemStats.averageFlushDuration}ms`,
		},
		cost: extractMetricValue(registry, "sf_cost_total"),
		tokens: {
			input: extractMetricValue(registry, "sf_tokens_input_total"),
			output: extractMetricValue(registry, "sf_tokens_output_total"),
		},
		performance: {
			averageToolExecution: extractMetricHistogramMean(
				registry,
				"sf_tool_execution_duration_ms",
			),
			averageModelRequest: extractMetricHistogramMean(
				registry,
				"sf_model_request_duration_ms",
			),
			averageDatabaseQuery: extractMetricHistogramMean(
				registry,
				"sf_database_query_duration_ms",
			),
		},
		errors: {
			tool: extractMetricValue(registry, "sf_tool_errors_total"),
			model: extractMetricValue(registry, "sf_model_errors_total"),
			database: extractMetricValue(registry, "sf_database_errors_total"),
			system: extractMetricValue(registry, "sf_system_warnings_total"),
		},
		resources: {
			activeSessions: extractMetricGaugeValue(
				registry,
				"sf_active_sessions_count",
			),
			activeAgents: extractMetricGaugeValue(registry, "sf_active_agents_count"),
			concurrentToolCalls: extractMetricGaugeValue(
				registry,
				"sf_concurrent_tool_calls",
			),
		},
	};
}

/**
 * Extract a metric value from the registry.
 */
function extractMetricValue(registry, metricName) {
	const metric = registry.counters.get(metricName);
	if (!metric) return 0;
	let total = 0;
	for (const value of metric.values.values()) total += value;
	return total;
}

/**
 * Extract histogram mean value.
 */
function extractMetricHistogramMean(registry, metricName) {
	const hist = registry.histograms.get(metricName);
	if (!hist || hist.count === 0) return 0;
	return Math.round(hist.sum / hist.count);
}

/**
 * Extract gauge value.
 */
function extractMetricGaugeValue(registry, metricName) {
	const gauge = registry.gauges.get(metricName);
	if (!gauge || gauge.values.size === 0) return 0;

	// For gauges, return the most recent value
	const values = Array.from(gauge.values.values());
	return values[values.length - 1] ?? 0;
}

// ─── Metric Types ───────────────────────────────────────────────────────────

class Counter {
	constructor(name, help, labelNames = []) {
		this.name = name;
		this.help = help;
		this.labelNames = labelNames;
		this.values = new Map(); // key → number
	}

	inc(labels = {}, amount = 1) {
		const key = this._key(labels);
		this.values.set(key, (this.values.get(key) ?? 0) + amount);
	}

	get(labels = {}) {
		return this.values.get(this._key(labels)) ?? 0;
	}

	_key(labels) {
		return _buildLabelKey(labels);
	}

	*lines() {
		yield `# HELP ${this.name} ${this.help}`;
		yield `# TYPE ${this.name} counter`;
		for (const [key, value] of this.values) {
			const labels = _parseLabelKey(key);
			yield fmtLine(this.name, value, labels);
		}
	}
}

class Gauge {
	constructor(name, help, labelNames = []) {
		this.name = name;
		this.help = help;
		this.labelNames = labelNames;
		this.values = new Map();
	}

	set(labels = {}, value) {
		const safe = Number.isFinite(value) ? value : 0;
		this.values.set(this._key(labels), safe);
	}

	get(labels = {}) {
		return this.values.get(this._key(labels)) ?? 0;
	}

	_key(labels) {
		return _buildLabelKey(labels);
	}

	*lines() {
		yield `# HELP ${this.name} ${this.help}`;
		yield `# TYPE ${this.name} gauge`;
		for (const [key, value] of this.values) {
			const labels = _parseLabelKey(key);
			yield fmtLine(this.name, value, labels);
		}
	}
}

class Histogram {
	constructor(
		name,
		help,
		buckets = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
	) {
		this.name = name;
		this.help = help;
		const capped = [...buckets]
			.sort((a, b) => a - b)
			.slice(0, MAX_HISTOGRAM_BUCKETS);
		this.buckets = capped;
		this.counts = new Map(); // bucket → count
		this.sum = 0;
		this.count = 0;
	}

	observe(value) {
		this.sum += value;
		this.count++;
		for (const bucket of this.buckets) {
			if (value <= bucket) {
				this.counts.set(bucket, (this.counts.get(bucket) ?? 0) + 1);
			}
		}
	}

	*lines() {
		yield `# HELP ${this.name} ${this.help}`;
		yield `# TYPE ${this.name} histogram`;
		for (const bucket of this.buckets) {
			yield fmtLine(`${this.name}_bucket`, this.counts.get(bucket) ?? 0, {
				le: String(bucket),
			});
		}
		yield fmtLine(`${this.name}_bucket`, this.count, { le: "+Inf" });
		yield fmtLine(`${this.name}_sum`, this.sum);
		yield fmtLine(`${this.name}_count`, this.count);
	}
}

// ─── Label Escaping ─────────────────────────────────────────────────────────

function _escapeLabel(v) {
	return String(v)
		.replace(/\\/g, "\\\\")
		.replace(/=/g, "\\=")
		.replace(/,/g, "\\,");
}

function _unescapeLabel(v) {
	return v.replace(/\\,/g, ",").replace(/\\=/g, "=").replace(/\\\\/g, "\\");
}

// ─── Label Key Builder (escapes values, stable ordering) ────────────────────

function _buildLabelKey(labels) {
	const keys = Object.keys(labels).sort();
	return keys.map((k) => `${k}=${_escapeLabel(labels[k] ?? "")}`).join(",");
}

function _parseLabelKey(key) {
	const labels = {};
	let i = 0;
	while (i < key.length) {
		// Find the '=' separator for this label
		const eqIdx = key.indexOf("=", i);
		if (eqIdx === -1) break;
		const k = key.slice(i, eqIdx);
		// Parse the value, handling escapes
		let v = "";
		let j = eqIdx + 1;
		while (j < key.length) {
			const ch = key[j];
			if (ch === "\\" && j + 1 < key.length) {
				const next = key[j + 1];
				if (next === "\\" || next === "=" || next === ",") {
					v += next;
					j += 2;
					continue;
				}
			}
			if (ch === ",") {
				break;
			}
			v += ch;
			j++;
		}
		labels[k] = v;
		i = j + 1; // skip the ','
	}
	return labels;
}

// ─── Formatter ──────────────────────────────────────────────────────────────

function fmtLine(name, value, labels = {}) {
	const labelStr = Object.entries(labels)
		.map(([k, v]) => `${k}="${v}"`)
		.join(",");
	const suffix = labelStr ? `{${labelStr}}` : "";
	return `${name}${suffix} ${value}`;
}

// ─── Validation ─────────────────────────────────────────────────────────────

function validateMetricName(name) {
	if (!name || typeof name !== "string") {
		throw new TypeError(
			`Metric name must be a non-empty string, got: ${typeof name}`,
		);
	}
	if (!METRIC_NAME_PATTERN.test(name)) {
		throw new Error(
			`Invalid metric name "${name}". Must match Prometheus naming convention: ` +
				`^[a-zA-Z_:][a-zA-Z0-9_:]*$`,
		);
	}
}

// ─── Central Registry ───────────────────────────────────────────────────────

class MetricsRegistry {
	counters = new Map();
	gauges = new Map();
	histograms = new Map();
	_metadata = new Map();

	counter(name, help, labelNames) {
		if (!this.counters.has(name)) {
			this.counters.set(name, new Counter(name, help, labelNames));
		}
		return this.counters.get(name);
	}

	gauge(name, help, labelNames) {
		if (!this.gauges.has(name)) {
			this.gauges.set(name, new Gauge(name, help, labelNames));
		}
		return this.gauges.get(name);
	}

	histogram(name, help, buckets) {
		if (!this.histograms.has(name)) {
			this.histograms.set(name, new Histogram(name, help, buckets));
		}
		return this.histograms.get(name);
	}

	buildText() {
		const lines = [];
		for (const c of this.counters.values()) {
			lines.push(...c.lines());
		}
		for (const g of this.gauges.values()) {
			lines.push(...g.lines());
		}
		for (const h of this.histograms.values()) {
			lines.push(...h.lines());
		}
		return lines.join("\n") + "\n";
	}

	clear() {
		this.counters.clear();
		this.gauges.clear();
		this.histograms.clear();
	}
}

// ─── Singleton ──────────────────────────────────────────────────────────────

let _registry = null;
let _flushTimer = null;
let _metricsHealthTimer = null;
let _basePath = "";
let _sessionId = "";
let _dbAdapter = null; // kept for API compat but no longer used for metrics writes
let _metricsDb = null; // dedicated metrics.db connection
let _flushFailures = 0;

function getRegistry() {
	if (!_registry) _registry = new MetricsRegistry();
	return _registry;
}

function metricsFilePath(basePath) {
	return join(sfRoot(basePath), "runtime", "sf-metrics.prom");
}

// ─── DB Persistence ─────────────────────────────────────────────────────────

function metricsDbPath(basePath) {
	return join(sfRoot(basePath), "metrics.db");
}

function openMetricsDb(basePath) {
	if (_metricsDb) return;
	try {
		mkdirSync(sfRoot(basePath), { recursive: true });
		const db = new DatabaseSync(metricsDbPath(basePath));
		db.exec("PRAGMA journal_mode=WAL");
		db.exec("PRAGMA synchronous=NORMAL");
		db.exec(`
			CREATE TABLE IF NOT EXISTS metrics (
				id INTEGER PRIMARY KEY AUTOINCREMENT,
				name TEXT NOT NULL,
				type TEXT NOT NULL CHECK(type IN ('counter', 'gauge', 'histogram')),
				labels TEXT,
				value REAL NOT NULL,
				timestamp TEXT NOT NULL DEFAULT (datetime('now')),
				session_id TEXT
			)
		`);
		db.exec(`CREATE INDEX IF NOT EXISTS idx_metrics_name ON metrics(name)`);
		db.exec(
			`CREATE INDEX IF NOT EXISTS idx_metrics_session ON metrics(session_id)`,
		);
		db.exec(
			`CREATE INDEX IF NOT EXISTS idx_metrics_name_ts ON metrics(name, timestamp DESC)`,
		);
		_metricsDb = db;
	} catch (err) {
		logWarning("metrics-central", `Failed to open metrics.db: ${err.message}`);
	}
}

function closeMetricsDb() {
	if (!_metricsDb) return;
	try {
		_metricsDb.close();
	} catch {
		// swallow
	}
	_metricsDb = null;
}

function _ensureMetricsTable(db) {
	// no-op — metrics.db is set up by openMetricsDb
	void db;
}

function persistMetricsToDb(registry, sessionId, _ignored) {
	const db = _metricsDb;
	if (!db) return;
	const ts = new Date().toISOString();
	function safeNum(n) {
		return Number.isFinite(n) ? n : 0;
	}
	try {
		const insert = db.prepare(
			"INSERT INTO metrics (name, type, labels, value, timestamp, session_id) VALUES (?, ?, ?, ?, ?, ?)",
		);
		for (const c of registry.counters.values()) {
			for (const [key, value] of c.values) {
				const labels = _parseLabelKey(key);
				insert.run(
					c.name,
					"counter",
					JSON.stringify(labels),
					safeNum(value),
					ts,
					sessionId,
				);
			}
		}
		for (const g of registry.gauges.values()) {
			for (const [key, value] of g.values) {
				const labels = _parseLabelKey(key);
				insert.run(
					g.name,
					"gauge",
					JSON.stringify(labels),
					safeNum(value),
					ts,
					sessionId,
				);
			}
		}
		for (const h of registry.histograms.values()) {
			insert.run(
				h.name,
				"histogram",
				JSON.stringify({ count: h.count, sum: h.sum }),
				safeNum(h.sum),
				ts,
				sessionId,
			);
		}
	} catch (err) {
		if (err.message?.includes("database is not open")) {
			closeMetricsDb();
			return;
		}
		logWarning("metrics-central", `DB persist failed: ${err.message}`);
	}
	// Prune if the table has grown beyond the cap (best-effort; never block flush)
	try {
		const row = _metricsDb?.prepare("SELECT count(*) as n FROM metrics").get();
		if (row && row.n > METRICS_DB_ROW_CAP) {
			_metricsDb
				.prepare(
					`DELETE FROM metrics WHERE rowid NOT IN (
					SELECT rowid FROM metrics ORDER BY timestamp DESC LIMIT ${METRICS_DB_ROW_CAP}
				)`,
				)
				.run();
		}
	} catch (_) {
		// swallow — prune failure must never surface to the user
	}
}

// ─── Flush with Retry ───────────────────────────────────────────────────────

function flushMetrics() {
	if (!_basePath) return;

	const flushStartTime = Date.now();
	_flushCount++;

	try {
		const text = getRegistry().buildText();
		const path = metricsFilePath(_basePath);
		mkdirSync(join(sfRoot(_basePath), "runtime"), { recursive: true });
		writeFileSync(path, text, "utf-8");
		// Persist to dedicated metrics.db
		persistMetricsToDb(getRegistry(), _sessionId, null);

		// Update performance metrics
		_flushSuccessCount++;
		_lastFlushDuration = Date.now() - flushStartTime;
		_lastFlushTimestamp = Date.now();
		_totalFlushDuration += _lastFlushDuration;
		_flushFailures = 0;

		// Record flush performance metrics
		try {
			getRegistry()
				.counter(
					"sf_metrics_flush_success_total",
					"Total successful metrics flushes",
					[],
				)
				.inc({}, 1);
			getRegistry()
				.gauge(
					"sf_metrics_flush_duration_ms",
					"Duration of last metrics flush in milliseconds",
					[],
				)
				.set({}, _lastFlushDuration);
		} catch {
			// Best effort - don't let metrics recording break the flush
		}
	} catch (err) {
		_flushFailureCount++;
		_flushFailures++;
		logWarning(
			"metrics-central",
			`Flush failed (attempt ${_flushFailures}): ${err.message}`,
		);
		if (_flushFailures < FLUSH_RETRY_MAX) {
			const delay = FLUSH_RETRY_BASE_MS * 2 ** (_flushFailures - 1);
			setTimeout(flushMetrics, delay);
		} else {
			// Record flush failure as a metric
			try {
				getRegistry()
					.counter(
						"sf_metrics_flush_failed_total",
						"Total metrics flush failures",
						[],
					)
					.inc({}, 1);
			} catch {
				// Best effort
			}
		}
	}
}

// ─── Public API ─────────────────────────────────────────────────────────────

/**
 * Initialize the centralized metrics system.
 *
 * @param {string} basePath — project root
 * @param {object} [opts] — { flushIntervalMs, sessionId, dbAdapter }
 */
export function initMetricsCentral(basePath, opts = {}) {
	_basePath = basePath;
	_sessionId = opts.sessionId ?? "";
	_dbAdapter = opts.dbAdapter ?? null; // accepted but no longer used for metrics writes
	const interval = opts.flushIntervalMs ?? FLUSH_INTERVAL_MS;

	// Reset metrics system stats on fresh init
	if (!_flushTimer) {
		_metricsSystemStartTime = Date.now();
		_flushCount = 0;
		_flushSuccessCount = 0;
		_flushFailureCount = 0;
		_lastFlushDuration = 0;
		_lastFlushTimestamp = 0;
		_totalFlushDuration = 0;
	}

	if (_flushTimer) clearInterval(_flushTimer);
	_flushTimer = setInterval(flushMetrics, interval);

	// Ensure timer doesn't keep process alive
	if (_flushTimer.unref) _flushTimer.unref();

	// Open dedicated metrics.db (separate from main sf.db to avoid WAL pressure)
	openMetricsDb(basePath);

	// Start periodic metrics system health reporting
	if (!_metricsHealthTimer) {
		_metricsHealthTimer = setInterval(() => {
			try {
				updateMetricsSystemHealth();
			} catch {
				// Non-fatal
			}
		}, 300000); // Every 5 minutes
		if (_metricsHealthTimer.unref) _metricsHealthTimer.unref();
	}
}

/**
 * Update metrics system health metrics.
 */
function updateMetricsSystemHealth() {
	const registry = getRegistry();
	try {
		// Record system uptime
		const uptime = Math.floor((Date.now() - _metricsSystemStartTime) / 1000);
		registry
			.gauge(
				"sf_metrics_system_uptime_seconds",
				"Metrics system uptime in seconds",
				[],
			)
			.set({}, uptime);

		// Record database status
		registry
			.gauge(
				"sf_metrics_database_status",
				"Database connection status (1=connected, 0=disconnected)",
				["project_path"],
			)
			.set({ project_path: _basePath || "unknown" }, _metricsDb ? 1 : 0);

		// Record in-memory metrics count
		let totalMetrics = 0;
		totalMetrics += registry.counters.size;
		totalMetrics += registry.gauges.size;
		totalMetrics += registry.histograms.size;

		registry
			.gauge(
				"sf_metrics_active_count",
				"Number of active metrics in memory",
				[],
			)
			.set({}, totalMetrics);
	} catch (err) {
		logWarning(
			"metrics-central",
			`Failed to update health metrics: ${err.message}`,
		);
	}
}

/**
 * Stop the metrics collector.
 */
export function stopMetricsCentral() {
	if (_flushTimer) {
		clearInterval(_flushTimer);
		_flushTimer = null;
	}
	if (_metricsHealthTimer) {
		clearInterval(_metricsHealthTimer);
		_metricsHealthTimer = null;
	}
	// Final flush attempt
	flushMetrics();
	_basePath = "";
	_sessionId = "";
	_dbAdapter = null;
	closeMetricsDb();
}

/**
 * Record a counter increment.
 *
 * @param {string} name — metric name (sf_ prefix recommended)
 * @param {object} [labels] — label key-value pairs
 * @param {number} [amount] — increment amount (default 1)
 */
export function recordCounter(name, labels = {}, amount = 1) {
	validateMetricName(name);
	const meta = getMetricMeta(name);
	// Inject session_id into labels if available
	if (_sessionId && !labels.session_id) {
		labels = { ...labels, session_id: _sessionId };
	}
	getRegistry()
		.counter(name, meta.help, Object.keys(labels))
		.inc(labels, amount);
}

/**
 * Record a gauge value.
 *
 * @param {string} name — metric name
 * @param {number} value — gauge value
 * @param {object} [labels] — label key-value pairs
 */
export function recordGauge(name, value, labels = {}) {
	validateMetricName(name);
	const meta = getMetricMeta(name);
	if (_sessionId && !labels.session_id) {
		labels = { ...labels, session_id: _sessionId };
	}
	getRegistry().gauge(name, meta.help, Object.keys(labels)).set(labels, value);
}

/**
 * Record a histogram observation.
 *
 * @param {string} name — metric name
 * @param {number} value — observed value
 */
export function recordHistogram(name, value) {
	validateMetricName(name);
	const meta = getMetricMeta(name);
	getRegistry().histogram(name, meta.help, meta.buckets).observe(value);
}

/**
 * Record cost and token usage for a unit.
 *
 * @param {string} unitId — unit identifier
 * @param {string} modelId — model identifier
 * @param {number} inputTokens — input token count
 * @param {number} outputTokens — output token count
 * @param {number} cost — cost in USD
 * @param {string} [workMode] — current work mode
 */
export function recordCost(
	unitId,
	modelId,
	inputTokens,
	outputTokens,
	cost,
	workMode = "",
) {
	const labels = { unit_id: unitId, model_id: modelId };
	if (workMode) labels.work_mode = workMode;
	recordCounter("sf_cost_total", labels, cost);
	recordCounter("sf_tokens_input_total", { model_id: modelId }, inputTokens);
	recordCounter("sf_tokens_output_total", { model_id: modelId }, outputTokens);
	recordGauge("sf_cost_last", cost, { unit_id: unitId, model_id: modelId });
}

/**
 * Record tool execution performance.
 *
 * @param {string} toolName — name of the tool
 * @param {number} durationMs — execution duration in milliseconds
 * @param {boolean} [isError] — whether the execution resulted in an error
 * @param {string} [errorType] — type of error if isError is true
 */
export function recordToolExecution(
	toolName,
	durationMs,
	isError = false,
	errorType = "",
) {
	recordHistogram("sf_tool_execution_duration_ms", durationMs);
	if (isError) {
		recordCounter(
			"sf_tool_errors_total",
			{ tool_name: toolName, error_type: errorType || "unknown" },
			1,
		);
	}
}

/**
 * Record model request performance.
 *
 * @param {string} modelId — model identifier
 * @param {number} durationMs — request duration in milliseconds
 * @param {boolean} [isError] — whether the request resulted in an error
 * @param {string} [errorType] — type of error if isError is true
 */
export function recordModelRequest(
	modelId,
	durationMs,
	isError = false,
	errorType = "",
) {
	recordHistogram("sf_model_request_duration_ms", durationMs);
	if (isError) {
		recordCounter(
			"sf_model_errors_total",
			{ model_id: modelId, error_type: errorType || "unknown" },
			1,
		);
	}
}

/**
 * Record database operation performance.
 *
 * @param {string} operation — database operation name
 * @param {number} durationMs — query duration in milliseconds
 * @param {boolean} [isError] — whether the operation resulted in an error
 * @param {string} [errorType] — type of error if isError is true
 */
export function recordDatabaseOperation(
	operation,
	durationMs,
	isError = false,
	errorType = "",
) {
	recordHistogram("sf_database_query_duration_ms", durationMs);
	if (isError) {
		recordCounter(
			"sf_database_errors_total",
			{ operation, error_type: errorType || "unknown" },
			1,
		);
	}
}

/**
 * Record system warning.
 *
 * @param {string} component — system component that issued the warning
 * @param {string} warningType — type of warning
 */
export function recordSystemWarning(component, warningType) {
	recordCounter(
		"sf_system_warnings_total",
		{ component, warning_type: warningType },
		1,
	);
}

/**
 * Update resource usage gauges.
 *
 * @param {object} resources — resource usage data
 * @param {number} [resources.activeSessions] — number of active sessions
 * @param {number} [resources.activeAgents] — number of active agents
 * @param {number} [resources.concurrentToolCalls] — number of concurrent tool calls
 */
export function updateResourceGauges(resources = {}) {
	if (resources.activeSessions !== undefined) {
		recordGauge("sf_active_sessions_count", resources.activeSessions);
	}
	if (resources.activeAgents !== undefined) {
		recordGauge("sf_active_agents_count", resources.activeAgents);
	}
	if (resources.concurrentToolCalls !== undefined) {
		recordGauge("sf_concurrent_tool_calls", resources.concurrentToolCalls);
	}
}

/**
 * Get current metrics text in Prometheus format.
 */
export function getMetricsText() {
	return getRegistry().buildText();
}

/**
 * Read persisted metrics from disk.
 */
export function readMetricsFile(basePath) {
	const path = metricsFilePath(basePath);
	if (!existsSync(path)) return null;
	try {
		return readFileSync(path, "utf-8");
	} catch {
		return null;
	}
}

/**
 * Query metrics from DB for a session.
 *
 * @param {object} db — DB adapter
 * @param {string} [sessionId] — session to filter by
 * @param {string} [name] — metric name to filter by
 * @param {number} [limit] — max rows to return
 * @returns {Array} — metric rows
 */
export function queryMetrics(_db, sessionId = null, name = null, limit = 1000) {
	if (!_metricsDb) return [];
	try {
		let sql = "SELECT * FROM metrics WHERE 1=1";
		const params = [];
		if (sessionId) {
			sql += " AND session_id = ?";
			params.push(sessionId);
		}
		if (name) {
			sql += " AND name = ?";
			params.push(name);
		}
		sql += " ORDER BY timestamp DESC LIMIT ?";
		params.push(limit);
		const stmt = _metricsDb.prepare(sql);
		return stmt.all(...params);
	} catch (err) {
		logWarning("metrics-central", `Query failed: ${err.message}`);
		return [];
	}
}

// ─── Metric Metadata Registry ───────────────────────────────────────────────

const METRIC_META = {
	// Subagent inheritance
	sf_subagent_dispatch_total: {
		help: "Total subagent dispatch attempts",
		labels: ["work_mode", "permission_profile"],
	},
	sf_subagent_dispatch_blocked: {
		help: "Subagent dispatches blocked by inheritance policy",
		labels: ["reason", "work_mode", "permission_profile"],
	},
	sf_subagent_dispatch_allowed: {
		help: "Subagent dispatches allowed after inheritance check",
		labels: ["work_mode", "permission_profile"],
	},

	// Mode transitions
	sf_mode_transition_total: {
		help: "Total mode transitions",
		labels: ["axis", "from", "to", "reason"],
	},

	// Task frontmatter
	sf_task_created_total: {
		help: "Total tasks created with frontmatter",
		labels: ["risk_level", "mutation_scope"],
	},
	sf_task_parallel_blocked: {
		help: "Tasks blocked from parallel execution by frontmatter",
		labels: ["reason"],
	},

	// Parallel intent
	sf_parallel_intent_declared: {
		help: "Parallel worker intents declared",
		labels: ["milestone_id"],
	},
	sf_parallel_intent_conflict: {
		help: "Parallel intent conflicts detected",
		labels: ["milestone_id"],
	},

	// Remote steering
	sf_remote_steering_applied: {
		help: "Remote steering directives applied",
		labels: ["directive_type", "source"],
	},
	sf_remote_steering_rejected: {
		help: "Remote steering directives rejected (throttle/invalid)",
		labels: ["reason"],
	},

	// Skill eval
	sf_skill_eval_runs_total: {
		help: "Total skill evaluation runs",
		labels: ["skill_name", "passed"],
	},
	sf_skill_eval_duration_ms: {
		help: "Skill evaluation duration in milliseconds",
		buckets: [100, 500, 1000, 5000, 10000, 30000],
	},

	// Cost guard
	sf_cost_guard_blocked: {
		help: "Units blocked by cost guard",
		labels: ["reason", "model_id"],
	},
	sf_cost_guard_hourly_spend: {
		help: "Current hourly spend in USD",
	},

	// Gate runner
	sf_gate_runs_total: {
		help: "Total gate executions",
		labels: ["gate_id", "outcome"],
	},
	sf_gate_latency_ms: {
		help: "Gate execution latency in milliseconds",
		buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000],
	},

	// Message bus
	sf_message_bus_messages_total: {
		help: "Total messages in bus",
		labels: ["agent_id"],
	},
	sf_message_bus_unread_total: {
		help: "Unread messages in bus",
		labels: ["agent_id"],
	},

	// Cost tracking
	sf_cost_total: {
		help: "Total cost in USD",
		labels: ["unit_id", "model_id", "work_mode"],
	},
	sf_tokens_input_total: {
		help: "Total input tokens",
		labels: ["model_id"],
	},
	sf_tokens_output_total: {
		help: "Total output tokens",
		labels: ["model_id"],
	},
	sf_cost_last: {
		help: "Last recorded cost in USD",
		labels: ["unit_id", "model_id"],
	},

	// Performance tracking
	sf_session_start_duration_ms: {
		help: "Session start duration in milliseconds",
		buckets: [100, 250, 500, 1000, 2000, 5000],
	},
	sf_tool_execution_duration_ms: {
		help: "Tool execution duration in milliseconds",
		buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000],
	},
	sf_model_request_duration_ms: {
		help: "Model request duration in milliseconds",
		buckets: [100, 500, 1000, 2500, 5000, 10000, 30000, 60000],
	},
	sf_database_query_duration_ms: {
		help: "Database query duration in milliseconds",
		buckets: [1, 5, 10, 25, 50, 100, 250, 500],
	},

	// Resource usage
	sf_active_sessions_count: {
		help: "Number of active sessions",
	},
	sf_active_agents_count: {
		help: "Number of active agents",
	},
	sf_concurrent_tool_calls: {
		help: "Number of concurrent tool calls",
	},

	// Error tracking
	sf_tool_errors_total: {
		help: "Total tool execution errors",
		labels: ["tool_name", "error_type"],
	},
	sf_model_errors_total: {
		help: "Total model request errors",
		labels: ["model_id", "error_type"],
	},
	sf_database_errors_total: {
		help: "Total database operation errors",
		labels: ["operation", "error_type"],
	},
	sf_system_warnings_total: {
		help: "Total system warnings",
		labels: ["component", "warning_type"],
	},

	// Internal
	sf_metrics_flush_failed_total: {
		help: "Total metrics flush failures",
	},
	sf_metrics_flush_success_total: {
		help: "Total successful metrics flushes",
	},
	sf_metrics_flush_duration_ms: {
		help: "Duration of last metrics flush in milliseconds",
	},
	sf_metrics_system_uptime_seconds: {
		help: "Metrics system uptime in seconds",
	},
	sf_metrics_database_status: {
		help: "Database connection status (1=connected, 0=disconnected)",
		labels: ["project_path"],
	},
};

function getMetricMeta(name) {
	return METRIC_META[name] ?? { help: name, labels: [] };
}

/**
 * Register custom metric metadata.
 */
export function registerMetricMeta(name, help, labels = [], buckets) {
	METRIC_META[name] = { help, labels, buckets };
}