singularity-forge/src/resources/extensions/sf/knowledge-injector.js

/**
 * Knowledge Injector — automatically injects relevant learnings into dispatch prompts.
 *
 * Purpose: During milestone planning, query KNOWLEDGE.md for relevant learnings and
 * inject them into execute-task, plan-slice, and other dispatch prompts. This makes
 * accumulated knowledge actionable in future runs instead of inert.
 *
 * Consumer: auto-prompts.js when loading prompts for dispatch.
 *
 * Implementation:
 * 1. Parse KNOWLEDGE.md judgment-log entries
 * 2. Extract key concepts (tags, domains, failure modes)
 * 3. Use semantic similarity scoring to match against current task context
 * 4. Inject high-confidence (>0.8) knowledge into prompt variables
 * 5. Track which knowledge was used (feedback loop)
 */

import { existsSync, readFileSync } from "node:fs";
import { join } from "node:path";
import { logWarning } from "./workflow-logger.js";

/**
 * Parse KNOWLEDGE.md and extract judgment-log entries.
 *
 * Format expected:
 * ```
 * ### Judgment Entry: <title>
 * - **Evidence:** <source>
 * - **Confidence:** 0.95
 * - **Domain:** <domain>
 * - **Recommendation:** <action>
 * ```
 */
function parseKnowledgeEntries(knowledgeContent) {
	const entries = [];
	const entryPattern = /### Judgment Entry:\s*(.+?)\n([\s\S]*?)(?=###\s|$)/g;

	let match;
	while ((match = entryPattern.exec(knowledgeContent)) !== null) {
		const title = match[1].trim();
		const body = match[2];

		// Extract fields
		const evidenceMatch = body.match(
			/[-*]\s+\*?\*?Evidence:\*?\*?\s*(.+?)(?:\n|$)/,
		);
		const confidenceMatch = body.match(
			/[-*]\s+\*?\*?Confidence:\*?\*?\s*([\d.]+)/,
		);
		const domainMatch = body.match(
			/[-*]\s+\*?\*?Domain:\*?\*?\s*(.+?)(?:\n|$)/,
		);
		const recommendationMatch = body.match(
			/[-*]\s+\*?\*?Recommendation:\*?\*?\s*(.+?)(?:\n|$)/,
		);

		entries.push({
			title,
			evidence: evidenceMatch ? evidenceMatch[1].trim() : "",
			confidence: confidenceMatch ? parseFloat(confidenceMatch[1]) : 0.5,
			domain: domainMatch ? domainMatch[1].trim() : "general",
			recommendation: recommendationMatch ? recommendationMatch[1].trim() : "",
			body: body.trim(),
		});
	}

	return entries;
}

/**
 * Extract key concepts (domain tags, failure modes, constraints) from knowledge entry.
 *
 * Used for semantic similarity matching.
 */
function extractConcepts(entry) {
	const concepts = new Set();

	// Add domain
	if (entry.domain) concepts.add(entry.domain);

	// Extract key phrases
	const phrasePatterns = [
		/avoid\s+(\w+)/gi,
		/use\s+(\w+)/gi,
		/requires?\s+(\w+)/gi,
		/prevents?\s+(\w+)/gi,
		/bug.*?(\w+)/gi,
		/error.*?(\w+)/gi,
	];

	for (const pattern of phrasePatterns) {
		let match;
		while ((match = pattern.exec(entry.body)) !== null) {
			concepts.add(match[1].toLowerCase());
		}
	}

	// Add title keywords
	const titleKeywords = entry.title.split(/\s+/).filter((w) => w.length > 3);
	titleKeywords.forEach((w) => concepts.add(w.toLowerCase()));

	return Array.from(concepts);
}

/**
 * Semantic similarity scoring (simple keyword-based for now).
 *
 * Purpose: Match knowledge entries to current task context.
 * Returns: 0.0-1.0 score
 */
function semanticSimilarity(knowledgeConcepts, contextKeywords) {
	if (!contextKeywords || contextKeywords.length === 0) return 0;

	const contextSet = new Set(contextKeywords.map((k) => k.toLowerCase()));
	const matches = knowledgeConcepts.filter((c) => contextSet.has(c));

	// Score: proportion of knowledge concepts that appear in context
	return matches.length / Math.max(knowledgeConcepts.length, 1);
}

/**
 * Find relevant knowledge for a given task context.
 *
 * Purpose: Given task domain/keywords, return matching knowledge entries.
 *
 * Parameters:
 * - knowledgeEntries: parsed KNOWLEDGE.md entries
 * - contextKeywords: task domain, task type, technology stack keywords
 * - minConfidence: filter entries below this confidence threshold (default 0.6)
 * - minSimilarity: filter entries below this similarity score (default 0.5)
 *
 * Returns: sorted array of relevant entries with scores
 */
export function findRelevantKnowledge(
	knowledgeEntries,
	contextKeywords,
	minConfidence = 0.6,
	minSimilarity = 0.5,
) {
	const relevant = [];

	for (const entry of knowledgeEntries) {
		// Filter by confidence
		if (entry.confidence < minConfidence) continue;

		// Score similarity
		const concepts = extractConcepts(entry);
		const similarity = semanticSimilarity(concepts, contextKeywords);

		if (similarity >= minSimilarity) {
			relevant.push({
				entry,
				similarity,
				score: entry.confidence * 0.7 + similarity * 0.3, // Weighted score
			});
		}
	}

	// Sort by combined score
	return relevant.sort((a, b) => b.score - a.score);
}

/**
 * Format knowledge for injection into prompts.
 *
 * Purpose: Convert knowledge entries to readable injection text for prompts.
 */
function formatKnowledgeForInjection(relevantKnowledge) {
	if (!relevantKnowledge || relevantKnowledge.length === 0) {
		return "(no relevant knowledge)";
	}

	const lines = ["## Relevant Prior Learning"];

	for (const item of relevantKnowledge.slice(0, 5)) {
		const { entry, score } = item;
		const confidence = (entry.confidence * 100).toFixed(0);
		const relevance = (score * 100).toFixed(0);

		lines.push(
			`\n### ${entry.title} [confidence: ${confidence}%, relevance: ${relevance}%]`,
		);
		lines.push(`**Domain:** ${entry.domain}`);
		lines.push(`**Evidence:** ${entry.evidence}`);
		lines.push(`**Recommendation:** ${entry.recommendation}`);
		lines.push(`\n${entry.body}`);
	}

	return lines.join("\n");
}

/**
 * Detect contradictory knowledge entries.
 *
 * Purpose: Flag when knowledge advises conflicting actions (e.g., "use Python 3.12"
 * vs. "avoid Python 3.12") so triage agents can resolve ambiguity.
 */
export function detectContradictions(knowledgeEntries) {
	const contradictions = [];
	const recommendations = new Map();

	for (const entry of knowledgeEntries) {
		const rec = entry.recommendation.toLowerCase();

		if (!recommendations.has(rec)) {
			recommendations.set(rec, []);
		}
		recommendations.get(rec).push(entry);
	}

	// Find conflicting patterns (e.g., "use X" vs "avoid X")
	for (const [rec, entries] of recommendations.entries()) {
		// Check for explicit conflicts
		if (rec.includes("avoid") || rec.includes("don't")) {
			const contradictingRec = rec.replace(/avoid|don't\s+/i, "use ");
			if (recommendations.has(contradictingRec)) {
				contradictions.push({
					type: "direct_conflict",
					entries,
					conflictingEntries: recommendations.get(contradictingRec),
				});
			}
		}
	}

	return contradictions;
}

/**
 * Load and parse KNOWLEDGE.md from project.
 */
function loadKnowledgeFile(basePath) {
	const candidates = [
		join(basePath, ".sf", "KNOWLEDGE.md"),
		join(basePath, "KNOWLEDGE.md"),
	];

	for (const p of candidates) {
		if (existsSync(p)) {
			try {
				return readFileSync(p, "utf-8");
			} catch {}
		}
	}

	return null;
}

/**
 * Main API: Inject knowledge into prompt variables.
 *
 * Purpose: This is called by auto-prompts.js when loading prompts, to add
 * {{knowledgeInjection}} variables automatically.
 *
 * Parameters:
 * - basePath: project root
 * - taskContext: { domain, keywords, taskType, technology } — context for matching
 * - options: { minConfidence, minSimilarity, maxEntries }
 *
 * Returns: formatted string suitable for prompt variable substitution
 */
export function injectKnowledgeIntPrompt(
	basePath,
	taskContext = {},
	options = {},
) {
	const knowledgeContent = loadKnowledgeFile(basePath);
	if (!knowledgeContent) {
		return "(knowledge base unavailable)";
	}

	const entries = parseKnowledgeEntries(knowledgeContent);
	if (entries.length === 0) {
		return "(no knowledge entries found)";
	}

	// Extract context keywords
	const contextKeywords = [
		taskContext.domain,
		taskContext.taskType,
		...(taskContext.keywords || []),
		...(taskContext.technology || []),
	].filter(Boolean);

	// Find relevant knowledge
	const minConfidence = options.minConfidence ?? 0.7;
	const minSimilarity = options.minSimilarity ?? 0.5;
	const relevant = findRelevantKnowledge(
		entries,
		contextKeywords,
		minConfidence,
		minSimilarity,
	);

	// Check for contradictions (log warning if found)
	const contradictions = detectContradictions(entries);
	if (contradictions.length > 0) {
		logWarning(
			"knowledge-injector",
			`${contradictions.length} contradictory knowledge entries detected`,
		);
	}

	// Format and return
	return formatKnowledgeForInjection(relevant);
}

/**
 * Track knowledge usage for feedback loop.
 *
 * Purpose: Record which knowledge was actually used in a dispatch so we can
 * later measure effectiveness and refine knowledge compounding.
 */
export function trackKnowledgeUsage(_basePath, taskId, injectedKnowledge) {
	// This would write to a usage log in .sf/knowledge-usage.jsonl
	// Implementation deferred to feedback-loop integration
	return {
		taskId,
		injectedCount: injectedKnowledge.length,
		timestamp: new Date().toISOString(),
	};
}

export default {
	injectKnowledgeIntPrompt,
	findRelevantKnowledge,
	detectContradictions,
	parseKnowledgeEntries,
	extractConcepts,
	semanticSimilarity,
	formatKnowledgeForInjection,
	loadKnowledgeFile,
	trackKnowledgeUsage,
};