1// Boundary Job
2//
3// Runs on a configurable interval. For each tree with autoBoundary enabled,
4// re-analyzes structural cohesion if the previous report is marked stale.
5//
6// Trees opt in via metadata.boundary.autoBoundary = true on the root.
7// Default off. Trees don't need boundary analysis on a schedule unless
8// the operator decides they do.
9
10import log from "../../seed/log.js";
11import { analyze } from "./core.js";
12
13let Node = null;
14let User = null;
15let _metadata = null;
16export function setModels(models) { Node = models.Node; User = models.User; }
17export function setMetadata(metadata) { _metadata = metadata; }
18
19let _timer = null;
20
21async function getIntervalMs() {
22 try {
23 const { getLandConfigValue } = await import("../../seed/landConfig.js");
24 return Number(getLandConfigValue("boundaryIntervalMs")) || 7 * 24 * 60 * 60 * 1000; // weekly
25 } catch {
26 return 7 * 24 * 60 * 60 * 1000;
27 }
28}
29
30export async function startBoundaryJob() {
31 if (_timer) return;
32 const interval = await getIntervalMs();
33 _timer = setInterval(runBoundaryCycle, interval);
34 if (_timer.unref) _timer.unref();
35 log.info("Boundary", `Boundary job started (checking every ${Math.round(interval / (24 * 60 * 60 * 1000))}d)`);
36}
37
38export function stopBoundaryJob() {
39 if (_timer) {
40 clearInterval(_timer);
41 _timer = null;
42 }
43}
44
45async function runBoundaryCycle() {
46 try {
47 // Find trees with autoBoundary enabled
48 const roots = await Node.find({
49 rootOwner: { $nin: [null, "SYSTEM"] },
50 "metadata.boundary.autoBoundary": true,
51 }).select("_id name rootOwner metadata").lean();
52
53 if (roots.length === 0) return;
54
55 log.verbose("Boundary", `Boundary cycle: ${roots.length} tree(s) opted in for auto-analysis`);
56
57 for (const root of roots) {
58 try {
59 const boundaryMeta = _metadata.getExtMeta(root, "boundary");
60 if (boundaryMeta.paused) continue;
61
62 // Only re-analyze if stale or never analyzed
63 if (!boundaryMeta.stale && boundaryMeta.lastAnalysis) continue;
64
65 const userId = root.rootOwner?.toString();
66 if (!userId) continue;
67
68 const user = await User.findById(userId).select("username").lean();
69 if (!user) continue;
70
71 log.verbose("Boundary", `Auto-analyzing boundary for tree ${root.name}`);
72 await analyze(root._id.toString(), userId, user.username);
73 } catch (err) {
74 log.warn("Boundary", `Auto-boundary failed for tree ${root.name}: ${err.message}`);
75 }
76 }
77 } catch (err) {
78 log.error("Boundary", `Boundary cycle error: ${err.message}`);
79 }
80}
81
1// Boundary Core
2//
3// Structural cohesion analysis. Five stages:
4// 1. Build branch profiles: extract topic + keywords per branch via LLM
5// 2. Build similarity matrix: pairwise branch comparison (embed vectors or LLM fallback)
6// 3. Detect patterns: blurred boundaries, fragmented concepts, orphaned nodes
7// 4. Analyze: full tree analysis, write report to metadata
8// 5. AnalyzeBranch: subtree variant, scoped and cheaper
9//
10// Orphan detection degrades gracefully:
11// - Embed installed: per-node cosine math, any branch size
12// - No embed, branch <= 20 nodes: LLM batch query
13// - No embed, branch > 20 nodes: skipped, report notes the gap
14
15import log from "../../seed/log.js";
16import { parseJsonSafe } from "../../seed/orchestrators/helpers.js";
17import { getExtension } from "../loader.js";
18
19let Node = null;
20let Note = null;
21let logContribution = null;
22let runChat = null;
23let useEnergy = async () => ({ energyUsed: 0 });
24let _metadata = null;
25
26export function setServices({ models, contributions, llm, energy, metadata }) {
27 Node = models.Node;
28 Note = models.Note;
29 logContribution = contributions.logContribution;
30 runChat = llm.runChat;
31 if (energy?.useEnergy) useEnergy = energy.useEnergy;
32 if (metadata) _metadata = metadata;
33}
34
35// ─────────────────────────────────────────────────────────────────────────
36// CONSTANTS
37// ─────────────────────────────────────────────────────────────────────────
38
39const MAX_BRANCHES = 15;
40const MAX_NOTE_CHARS_PER_BRANCH = 4000;
41const MAX_NOTES_PER_NODE = 5;
42const BLURRED_THRESHOLD = 0.70;
43const BLURRED_HIGH_THRESHOLD = 0.85;
44const ORPHAN_THRESHOLD = 0.35;
45const FRAGMENTED_MIN_BRANCHES = 3;
46const MAX_FINDINGS = 50;
47const ORPHAN_LLM_NODE_LIMIT = 20;
48const MAX_ORPHAN_NODES_PER_BRANCH = 50;
49
50// ─────────────────────────────────────────────────────────────────────────
51// COSINE SIMILARITY (local, no dependency on embed)
52// ─────────────────────────────────────────────────────────────────────────
53
54function cosineSimilarity(a, b) {
55 if (!a || !b || a.length !== b.length) return 0;
56 let dot = 0, normA = 0, normB = 0;
57 for (let i = 0; i < a.length; i++) {
58 dot += a[i] * b[i];
59 normA += a[i] * a[i];
60 normB += b[i] * b[i];
61 }
62 const denom = Math.sqrt(normA) * Math.sqrt(normB);
63 return denom === 0 ? 0 : dot / denom;
64}
65
66// ─────────────────────────────────────────────────────────────────────────
67// STAGE 1: BUILD BRANCH PROFILES
68// ─────────────────────────────────────────────────────────────────────────
69
70const TOPIC_PROMPT = `You are analyzing a branch of a tree to determine what it is about.
71
72Branch root: "{branchName}"
73Node names in this branch: {nodeNames}
74
75Content samples from notes in this branch:
76{contentSamples}
77
78What is this branch about? Respond with JSON only:
79{
80 "topic": "one sentence description of the branch's subject",
81 "keywords": ["keyword1", "keyword2", "keyword3"]
82}`;
83
84/**
85 * For each direct child of the analysis root, collect node names and note
86 * content, then ask the LLM to produce a topic summary and keywords.
87 */
88async function buildBranchProfiles(rootId, userId, username) {
89 // Get all non-system, non-trimmed nodes in this tree
90 const allNodes = await Node.find({
91 rootOwner: rootId,
92 status: { $ne: "trimmed" },
93 systemRole: { $eq: null },
94 })
95 .select("_id name parent children metadata")
96 .lean();
97
98 if (allNodes.length === 0) return { profiles: new Map(), allNodes: [] };
99
100 const nodeMap = new Map();
101 for (const n of allNodes) nodeMap.set(n._id.toString(), n);
102
103 // The root's direct children are the branches
104 const root = await Node.findById(rootId).select("children").lean();
105 if (!root || !root.children) return { profiles: new Map(), allNodes };
106
107 let branchRoots = root.children
108 .map(id => nodeMap.get(id.toString()))
109 .filter(n => n && !n.systemRole);
110
111 // If more than MAX_BRANCHES, take the largest by descendant count
112 if (branchRoots.length > MAX_BRANCHES) {
113 const withCounts = branchRoots.map(br => ({
114 node: br,
115 count: countDescendants(br._id.toString(), nodeMap),
116 }));
117 withCounts.sort((a, b) => b.count - a.count);
118 branchRoots = withCounts.slice(0, MAX_BRANCHES).map(w => w.node);
119 }
120
121 // For each branch, collect names + content
122 const profiles = new Map();
123
124 for (const br of branchRoots) {
125 const brId = br._id.toString();
126 const descendants = collectDescendants(brId, nodeMap);
127 const nodeNames = descendants.map(id => nodeMap.get(id)?.name).filter(Boolean);
128 const nodeIds = descendants;
129
130 // Get recent note content
131 const notes = await Note.find({
132 nodeId: { $in: nodeIds },
133 contentType: "text",
134 })
135 .sort({ dateCreated: -1 })
136 .select("content nodeId")
137 .limit(nodeIds.length * MAX_NOTES_PER_NODE)
138 .lean();
139
140 let contentChars = 0;
141 const samples = [];
142 for (const note of notes) {
143 if (contentChars >= MAX_NOTE_CHARS_PER_BRANCH) break;
144 const snippet = (note.content || "").slice(0, 500);
145 samples.push(snippet);
146 contentChars += snippet.length;
147 }
148
149 // Check for compress essences
150 const compressExt = getExtension("tree-compress");
151 if (compressExt) {
152 for (const nodeId of nodeIds.slice(0, 10)) {
153 const node = nodeMap.get(nodeId);
154 if (!node) continue;
155 const compressMeta = node.metadata instanceof Map
156 ? node.metadata.get("compress")
157 : node.metadata?.compress;
158 if (compressMeta?.essence) {
159 const essenceText = typeof compressMeta.essence === "string"
160 ? compressMeta.essence
161 : JSON.stringify(compressMeta.essence);
162 samples.push(`[Compressed essence] ${essenceText.slice(0, 300)}`);
163 }
164 }
165 }
166
167 // Ask LLM for topic extraction
168 const prompt = TOPIC_PROMPT
169 .replace("{branchName}", br.name)
170 .replace("{nodeNames}", nodeNames.join(", "))
171 .replace("{contentSamples}", samples.join("\n---\n") || "(no notes yet)");
172
173 try {
174 const result = await runChat({
175 userId,
176 username,
177 message: prompt,
178 mode: "tree:respond",
179 rootId,
180 slot: "boundary",
181 });
182
183 const parsed = parseJsonSafe(result?.answer);
184 if (parsed && parsed.topic) {
185 profiles.set(brId, {
186 branchName: br.name,
187 topic: parsed.topic,
188 keywords: Array.isArray(parsed.keywords) ? parsed.keywords.slice(0, 5) : [],
189 nodeCount: descendants.length,
190 nodeIds: descendants,
191 });
192 } else {
193 // Fallback: use branch name as topic
194 profiles.set(brId, {
195 branchName: br.name,
196 topic: br.name,
197 keywords: [br.name.toLowerCase()],
198 nodeCount: descendants.length,
199 nodeIds: descendants,
200 });
201 }
202 } catch (err) {
203 log.debug("Boundary", `Topic extraction failed for branch ${br.name}: ${err.message}`);
204 profiles.set(brId, {
205 branchName: br.name,
206 topic: br.name,
207 keywords: [br.name.toLowerCase()],
208 nodeCount: descendants.length,
209 nodeIds: descendants,
210 });
211 }
212 }
213
214 return { profiles, allNodes };
215}
216
217function countDescendants(nodeId, nodeMap) {
218 let count = 0;
219 const stack = [nodeId];
220 const visited = new Set();
221 while (stack.length > 0) {
222 const id = stack.pop();
223 if (visited.has(id)) continue;
224 visited.add(id);
225 count++;
226 const node = nodeMap.get(id);
227 if (node?.children) {
228 for (const child of node.children) {
229 stack.push(child.toString());
230 }
231 }
232 }
233 return count;
234}
235
236function collectDescendants(nodeId, nodeMap) {
237 const ids = [];
238 const stack = [nodeId];
239 const visited = new Set();
240 while (stack.length > 0) {
241 const id = stack.pop();
242 if (visited.has(id)) continue;
243 visited.add(id);
244 ids.push(id);
245 const node = nodeMap.get(id);
246 if (node?.children) {
247 for (const child of node.children) {
248 stack.push(child.toString());
249 }
250 }
251 }
252 return ids;
253}
254
255// ─────────────────────────────────────────────────────────────────────────
256// STAGE 2: BUILD SIMILARITY MATRIX
257// ─────────────────────────────────────────────────────────────────────────
258
259const SIMILARITY_PROMPT = `Rate the semantic similarity between each pair of branch topics on a scale from 0.0 to 1.0.
2600.0 means completely unrelated. 1.0 means identical topics.
261
262Branches:
263{branchList}
264
265Return a JSON array of objects, one per pair:
266[
267 { "a": "branch_id_1", "b": "branch_id_2", "similarity": 0.75 }
268]
269
270Only include pairs. Do not include self-comparisons.`;
271
272async function buildSimilarityMatrix(profiles, userId, username, rootId) {
273 const branchIds = [...profiles.keys()];
274 const n = branchIds.length;
275
276 if (n < 2) {
277 return { matrix: [], branchIds, embeddings: null };
278 }
279
280 // Check if embed extension is available with sufficient coverage
281 const embedExt = getExtension("embed");
282 let useEmbeddings = false;
283 let embeddings = null;
284
285 if (embedExt?.exports?.generateEmbedding) {
286 try {
287 const status = await embedExt.exports.getEmbedStatus?.();
288 const coverage = status?.coverage ?? 0;
289 if (coverage >= 0.5) {
290 useEmbeddings = true;
291 }
292 } catch (err) {
293 log.debug("Boundary", "Embed status check failed:", err.message);
294 }
295 }
296
297 // Initialize NxN matrix
298 const matrix = Array.from({ length: n }, () => Array(n).fill(0));
299 for (let i = 0; i < n; i++) matrix[i][i] = 1.0;
300
301 if (useEmbeddings) {
302 // Vector path: generate embedding per branch profile
303 embeddings = new Map();
304 for (const [brId, profile] of profiles) {
305 const text = `${profile.topic}. Keywords: ${profile.keywords.join(", ")}`;
306 try {
307 const vector = await embedExt.exports.generateEmbedding(text, userId);
308 if (vector) embeddings.set(brId, vector);
309 } catch (err) {
310 log.debug("Boundary", `Embedding failed for branch ${profile.branchName}: ${err.message}`);
311 }
312 }
313
314 // Pairwise cosine similarity
315 for (let i = 0; i < n; i++) {
316 for (let j = i + 1; j < n; j++) {
317 const vecA = embeddings.get(branchIds[i]);
318 const vecB = embeddings.get(branchIds[j]);
319 if (vecA && vecB) {
320 const sim = cosineSimilarity(vecA, vecB);
321 matrix[i][j] = sim;
322 matrix[j][i] = sim;
323 }
324 }
325 }
326 } else {
327 // LLM fallback: batch comparison
328 const branchList = branchIds.map(id => {
329 const p = profiles.get(id);
330 return `${id}: "${p.branchName}" -- ${p.topic}`;
331 }).join("\n");
332
333 const prompt = SIMILARITY_PROMPT.replace("{branchList}", branchList);
334
335 try {
336 const result = await runChat({
337 userId,
338 username,
339 message: prompt,
340 mode: "tree:respond",
341 rootId,
342 slot: "boundary",
343 });
344
345 const parsed = parseJsonSafe(result?.answer);
346 if (Array.isArray(parsed)) {
347 const indexMap = new Map();
348 for (let i = 0; i < n; i++) indexMap.set(branchIds[i], i);
349
350 for (const pair of parsed) {
351 const iA = indexMap.get(pair.a);
352 const iB = indexMap.get(pair.b);
353 const sim = Number(pair.similarity);
354 if (iA != null && iB != null && !isNaN(sim)) {
355 matrix[iA][iB] = Math.max(0, Math.min(1, sim));
356 matrix[iB][iA] = Math.max(0, Math.min(1, sim));
357 }
358 }
359 }
360 } catch (err) {
361 log.warn("Boundary", `Similarity matrix LLM call failed: ${err.message}`);
362 }
363 }
364
365 return { matrix, branchIds, embeddings };
366}
367
368// ─────────────────────────────────────────────────────────────────────────
369// STAGE 3: DETECT PATTERNS
370// ─────────────────────────────────────────────────────────────────────────
371
372const ORPHAN_PROMPT = `You are analyzing nodes in a branch about: "{branchTopic}"
373
374Which of these nodes do NOT belong in this branch? A node doesn't belong if its content is about a completely different subject.
375
376Nodes:
377{nodeList}
378
379Return a JSON array of objects for nodes that don't belong:
380[
381 { "nodeId": "...", "reason": "why it doesn't belong" }
382]
383
384If all nodes belong, return: []`;
385
386async function detectPatterns(profiles, matrix, branchIds, allNodes, userId, username, rootId, embeddings) {
387 const findings = [];
388 const degraded = [];
389 const branchSummaries = {};
390 const nodeMap = new Map();
391 for (const n of allNodes) nodeMap.set(n._id.toString(), n);
392
393 const embedExt = getExtension("embed");
394 const hasEmbed = !!(embedExt?.exports?.generateEmbedding);
395
396 // ── Blurred boundaries ──────────────────────────────────────────────
397 const n = branchIds.length;
398 for (let i = 0; i < n; i++) {
399 for (let j = i + 1; j < n; j++) {
400 const sim = matrix[i][j];
401 if (sim >= BLURRED_THRESHOLD) {
402 const profileA = profiles.get(branchIds[i]);
403 const profileB = profiles.get(branchIds[j]);
404 findings.push({
405 type: "blurred",
406 severity: sim >= BLURRED_HIGH_THRESHOLD ? "high" : "moderate",
407 description:
408 `"${profileA.branchName}" and "${profileB.branchName}" overlap significantly. ` +
409 `Both cover: ${profileA.topic}`,
410 nodes: [],
411 branches: [branchIds[i], branchIds[j]],
412 similarity: Math.round(sim * 100) / 100,
413 suggestion:
414 `Consider consolidating overlapping content between "${profileA.branchName}" and "${profileB.branchName}"`,
415 });
416 }
417 }
418 }
419
420 // ── Fragmented concepts ─────────────────────────────────────────────
421 const keywordToBranches = new Map();
422 for (const [brId, profile] of profiles) {
423 for (const kw of profile.keywords) {
424 const normalized = kw.toLowerCase().trim();
425 if (!normalized) continue;
426 if (!keywordToBranches.has(normalized)) keywordToBranches.set(normalized, []);
427 keywordToBranches.get(normalized).push(brId);
428 }
429 }
430
431 for (const [keyword, branches] of keywordToBranches) {
432 if (branches.length >= FRAGMENTED_MIN_BRANCHES) {
433 const branchNames = branches.map(id => `"${profiles.get(id)?.branchName}"`).join(", ");
434 findings.push({
435 type: "fragmented",
436 severity: branches.length >= 5 ? "high" : "moderate",
437 description:
438 `The concept "${keyword}" appears across ${branches.length} branches: ${branchNames}`,
439 nodes: [],
440 branches,
441 similarity: null,
442 suggestion:
443 `Consider consolidating "${keyword}" content into a single branch`,
444 });
445 }
446 }
447
448 // ── Orphaned nodes ──────────────────────────────────────────────────
449 for (const [brId, profile] of profiles) {
450 const nodeIds = profile.nodeIds || [];
451 branchSummaries[brId] = {
452 topic: profile.topic,
453 coherence: 1.0, // default, refined below
454 nodeCount: profile.nodeCount,
455 keywords: profile.keywords,
456 orphanSkipped: false,
457 };
458
459 // Skip the branch root itself
460 const childNodeIds = nodeIds.filter(id => id !== brId);
461 if (childNodeIds.length === 0) continue;
462
463 if (hasEmbed) {
464 // Embed path: per-node cosine similarity
465 const cappedIds = childNodeIds.slice(0, MAX_ORPHAN_NODES_PER_BRANCH);
466 const branchVector = embeddings?.get(brId);
467
468 if (!branchVector) continue;
469
470 const similarities = [];
471 for (const nodeId of cappedIds) {
472 // Get the node's most recent note embedding
473 try {
474 const note = await Note.findOne({
475 nodeId,
476 contentType: "text",
477 "metadata.embed.vector": { $exists: true },
478 })
479 .sort({ dateCreated: -1 })
480 .select("metadata")
481 .lean();
482
483 if (!note) continue;
484
485 const vector = note.metadata instanceof Map
486 ? note.metadata.get("embed")?.vector
487 : note.metadata?.embed?.vector;
488
489 if (!vector) continue;
490
491 const sim = cosineSimilarity(vector, branchVector);
492 similarities.push(sim);
493
494 if (sim < ORPHAN_THRESHOLD) {
495 const nodeName = nodeMap.get(nodeId)?.name || nodeId;
496 findings.push({
497 type: "orphaned",
498 severity: sim < 0.20 ? "high" : "moderate",
499 description:
500 `"${nodeName}" in branch "${profile.branchName}" has low semantic similarity ` +
501 `(${Math.round(sim * 100) / 100}) to the branch topic: ${profile.topic}`,
502 nodes: [nodeId],
503 branches: [brId],
504 similarity: Math.round(sim * 100) / 100,
505 suggestion:
506 `Consider moving "${nodeName}" to a more relevant branch`,
507 });
508 }
509 } catch (err) {
510 log.debug("Boundary", "Orphan embed lookup failed for node:", err.message);
511 }
512 }
513
514 // Branch coherence: average similarity
515 if (similarities.length > 0) {
516 const avg = similarities.reduce((a, b) => a + b, 0) / similarities.length;
517 branchSummaries[brId].coherence = Math.round(avg * 100) / 100;
518 }
519
520 } else if (childNodeIds.length <= ORPHAN_LLM_NODE_LIMIT) {
521 // LLM path for small branches
522 const nodeList = [];
523 for (const nodeId of childNodeIds) {
524 const nodeName = nodeMap.get(nodeId)?.name || nodeId;
525 const note = await Note.findOne({ nodeId, contentType: "text" })
526 .sort({ dateCreated: -1 })
527 .select("content")
528 .lean();
529 const preview = note?.content?.slice(0, 200) || "(no content)";
530 nodeList.push(`${nodeId}: "${nodeName}" -- ${preview}`);
531 }
532
533 const prompt = ORPHAN_PROMPT
534 .replace("{branchTopic}", profile.topic)
535 .replace("{nodeList}", nodeList.join("\n"));
536
537 try {
538 const result = await runChat({
539 userId,
540 username,
541 message: prompt,
542 mode: "tree:respond",
543 rootId,
544 slot: "boundary",
545 });
546
547 const parsed = parseJsonSafe(result?.answer);
548 if (Array.isArray(parsed)) {
549 for (const orphan of parsed) {
550 if (!orphan.nodeId) continue;
551 const nodeName = nodeMap.get(orphan.nodeId)?.name || orphan.nodeId;
552 findings.push({
553 type: "orphaned",
554 severity: "moderate",
555 description:
556 `"${nodeName}" in branch "${profile.branchName}" may not belong. ` +
557 `${orphan.reason || "Content does not match branch topic."}`,
558 nodes: [orphan.nodeId],
559 branches: [brId],
560 similarity: null,
561 suggestion:
562 `Consider moving "${nodeName}" to a more relevant branch`,
563 });
564 }
565
566 // Rough coherence: proportion of non-orphaned nodes
567 const orphanCount = parsed.length;
568 const totalNodes = childNodeIds.length;
569 branchSummaries[brId].coherence =
570 totalNodes > 0
571 ? Math.round(((totalNodes - orphanCount) / totalNodes) * 100) / 100
572 : 1.0;
573 }
574 } catch (err) {
575 log.debug("Boundary", `Orphan detection failed for branch ${profile.branchName}: ${err.message}`);
576 }
577
578 } else {
579 // Large branch without embed: skip orphan detection
580 branchSummaries[brId].orphanSkipped = true;
581 degraded.push(
582 `Orphan detection skipped for "${profile.branchName}" (${childNodeIds.length} nodes, no embed extension)`
583 );
584
585 // Branch coherence from similarity matrix only (less precise)
586 // Use average similarity of this branch to all others as an inverse proxy:
587 // a branch very similar to others is less coherent (doing multiple things)
588 const brIndex = branchIds.indexOf(brId);
589 if (brIndex >= 0) {
590 let simSum = 0;
591 let simCount = 0;
592 for (let j = 0; j < branchIds.length; j++) {
593 if (j === brIndex) continue;
594 simSum += matrix[brIndex][j];
595 simCount++;
596 }
597 // High avg similarity to other branches = low coherence (branch is too broad)
598 const avgSim = simCount > 0 ? simSum / simCount : 0;
599 branchSummaries[brId].coherence = Math.round((1 - avgSim) * 100) / 100;
600 }
601 }
602 }
603
604 // Overall coherence: weighted average by node count
605 let weightedSum = 0;
606 let totalWeight = 0;
607 for (const summary of Object.values(branchSummaries)) {
608 weightedSum += summary.coherence * summary.nodeCount;
609 totalWeight += summary.nodeCount;
610 }
611 const overallCoherence = totalWeight > 0
612 ? Math.round((weightedSum / totalWeight) * 100) / 100
613 : 1.0;
614
615 // Sort findings by severity (high first), cap at MAX_FINDINGS
616 const severityOrder = { high: 0, moderate: 1, low: 2 };
617 findings.sort((a, b) => (severityOrder[a.severity] || 2) - (severityOrder[b.severity] || 2));
618
619 return {
620 findings: findings.slice(0, MAX_FINDINGS),
621 branches: branchSummaries,
622 overallCoherence,
623 degraded,
624 };
625}
626
627// ─────────────────────────────────────────────────────────────────────────
628// STAGE 4: ANALYZE (full tree)
629// ─────────────────────────────────────────────────────────────────────────
630
631/**
632 * Run structural cohesion analysis on an entire tree.
633 * Writes the report to metadata.boundary on the root node.
634 */
635export async function analyze(rootId, userId, username) {
636 await useEnergy({ userId, action: "boundaryAnalyze" });
637
638 const root = await Node.findById(rootId).select("_id name rootOwner").lean();
639 if (!root) throw new Error("Tree root not found");
640 if (!root.rootOwner) throw new Error("Node is not a tree root");
641
642 // Stage 1: build branch profiles
643 log.verbose("Boundary", `Analyzing tree ${root.name} (${rootId})`);
644 const { profiles, allNodes } = await buildBranchProfiles(rootId, userId, username);
645
646 if (profiles.size === 0) {
647 throw new Error("Tree has no branches to analyze");
648 }
649
650 // Stage 2: build similarity matrix
651 const { matrix, branchIds, embeddings } = await buildSimilarityMatrix(
652 profiles, userId, username, rootId
653 );
654
655 // Stage 3: detect patterns
656 const { findings, branches, overallCoherence, degraded } = await detectPatterns(
657 profiles, matrix, branchIds, allNodes, userId, username, rootId, embeddings
658 );
659
660 // Build the report
661 const report = {
662 lastAnalysis: new Date().toISOString(),
663 stale: false,
664 branches,
665 findings,
666 overallCoherence,
667 analyzedBy: userId,
668 branchCount: profiles.size,
669 nodeCount: allNodes.length,
670 usedEmbeddings: !!embeddings,
671 degraded: degraded.length > 0 ? degraded : undefined,
672 };
673
674 // Write to root metadata
675 const rootDoc = await Node.findById(rootId);
676 if (rootDoc) {
677 await _metadata.setExtMeta(rootDoc, "boundary", report);
678 }
679
680 // Log contribution
681 await logContribution({
682 userId,
683 nodeId: rootId,
684 wasAi: true,
685 action: "boundary:analyzed",
686 extensionData: {
687 boundary: {
688 overallCoherence,
689 findingsCount: findings.length,
690 branchCount: profiles.size,
691 },
692 },
693 });
694
695 log.info(
696 "Boundary",
697 `Analysis complete for ${root.name}: coherence ${overallCoherence}, ` +
698 `${findings.length} finding(s) across ${profiles.size} branch(es)`
699 );
700
701 return report;
702}
703
704// ─────────────────────────────────────────────────────────────────────────
705// STAGE 5: ANALYZE BRANCH (subtree variant)
706// ─────────────────────────────────────────────────────────────────────────
707
708/**
709 * Run cohesion analysis scoped to a subtree.
710 * Treats the given node as the analysis root, its children as branches.
711 * Writes the report to metadata.boundary on the given node.
712 */
713export async function analyzeBranch(nodeId, userId, username) {
714 await useEnergy({ userId, action: "boundaryBranchScan" });
715
716 const node = await Node.findById(nodeId).select("_id name rootOwner children").lean();
717 if (!node) throw new Error("Node not found");
718
719 // Resolve the tree root for context
720 let rootId;
721 if (node.rootOwner) {
722 rootId = nodeId;
723 } else {
724 const { resolveRootNode } = await import("../../seed/tree/treeFetch.js");
725 const root = await resolveRootNode(nodeId);
726 rootId = root?._id?.toString();
727 }
728 if (!rootId) throw new Error("Could not resolve tree root");
729
730 // Build profiles scoped to this subtree
731 log.verbose("Boundary", `Analyzing subtree from ${node.name} (${nodeId})`);
732
733 // Get all descendants of this node
734 const allNodes = await Node.find({
735 rootOwner: rootId,
736 status: { $ne: "trimmed" },
737 systemRole: { $eq: null },
738 })
739 .select("_id name parent children metadata")
740 .lean();
741
742 const nodeMap = new Map();
743 for (const n of allNodes) nodeMap.set(n._id.toString(), n);
744
745 // Filter to only descendants of the analysis node
746 const descendantIds = new Set(collectDescendants(nodeId, nodeMap));
747 const subtreeNodes = allNodes.filter(n => descendantIds.has(n._id.toString()));
748
749 if (subtreeNodes.length === 0) {
750 throw new Error("Subtree has no nodes to analyze");
751 }
752
753 // Use the analysis node as root, its children as branches
754 const { profiles } = await buildBranchProfiles(nodeId, userId, username);
755
756 if (profiles.size === 0) {
757 throw new Error("Subtree has no branches to analyze");
758 }
759
760 const { matrix, branchIds, embeddings } = await buildSimilarityMatrix(
761 profiles, userId, username, rootId
762 );
763
764 const { findings, branches, overallCoherence, degraded } = await detectPatterns(
765 profiles, matrix, branchIds, subtreeNodes, userId, username, rootId, embeddings
766 );
767
768 const report = {
769 lastAnalysis: new Date().toISOString(),
770 stale: false,
771 branches,
772 findings,
773 overallCoherence,
774 analyzedBy: userId,
775 branchCount: profiles.size,
776 nodeCount: subtreeNodes.length,
777 usedEmbeddings: !!embeddings,
778 degraded: degraded.length > 0 ? degraded : undefined,
779 subtreeOf: nodeId,
780 };
781
782 // Write to this node's metadata (not the tree root)
783 const nodeDoc = await Node.findById(nodeId);
784 if (nodeDoc) {
785 await _metadata.setExtMeta(nodeDoc, "boundary", report);
786 }
787
788 await logContribution({
789 userId,
790 nodeId,
791 wasAi: true,
792 action: "boundary:branch-analyzed",
793 extensionData: {
794 boundary: {
795 overallCoherence,
796 findingsCount: findings.length,
797 branchCount: profiles.size,
798 },
799 },
800 });
801
802 log.verbose(
803 "Boundary",
804 `Branch analysis complete for ${node.name}: coherence ${overallCoherence}, ` +
805 `${findings.length} finding(s)`
806 );
807
808 return report;
809}
810
811// ─────────────────────────────────────────────────────────────────────────
812// HELPERS (exported for other extensions)
813// ─────────────────────────────────────────────────────────────────────────
814
815/**
816 * Read the last boundary analysis report from a tree root's metadata.
817 */
818export async function getBoundaryReport(rootId) {
819 const root = await Node.findById(rootId).select("metadata").lean();
820 if (!root) return null;
821 return _metadata.getExtMeta(root, "boundary") || null;
822}
823
824/**
825 * Mark the boundary analysis as stale without clearing the report.
826 * Called by afterNote hook when content changes.
827 */
828export async function markStale(rootId) {
829 const root = await Node.findById(rootId);
830 if (!root) return;
831
832 const meta = _metadata.getExtMeta(root, "boundary");
833 if (!meta || !meta.lastAnalysis) return; // nothing to mark stale
834 if (meta.stale) return; // already stale
835
836 await _metadata.mergeExtMeta(root, "boundary", { stale: true });
837}
838
839/**
840 * Extract orphaned findings formatted for reroot consumption.
841 * Returns: [{ nodeId, nodeName, currentBranch, reason }]
842 */
843export async function getOrphanedNodes(rootId) {
844 const report = await getBoundaryReport(rootId);
845 if (!report || !report.findings) return [];
846
847 return report.findings
848 .filter(f => f.type === "orphaned" && f.nodes?.length > 0)
849 .map(f => ({
850 nodeId: f.nodes[0],
851 description: f.description,
852 currentBranch: f.branches?.[0] || null,
853 suggestion: f.suggestion,
854 }));
855}
856
1import log from "../../seed/log.js";
2import tools from "./tools.js";
3import {
4 setServices,
5 analyze,
6 analyzeBranch,
7 getBoundaryReport,
8 getOrphanedNodes,
9 markStale,
10} from "./core.js";
11
12export async function init(core) {
13 const BG = core.llm.LLM_PRIORITY.BACKGROUND;
14
15 core.llm.registerRootLlmSlot("boundary");
16
17 setServices({
18 models: core.models,
19 contributions: core.contributions,
20 llm: { ...core.llm, runChat: async (opts) => {
21 if (opts.userId && opts.userId !== "SYSTEM" && !await core.llm.userHasLlm(opts.userId)) return { answer: null };
22 return core.llm.runChat({ ...opts, llmPriority: BG });
23 } },
24 energy: core.energy || null,
25 metadata: core.metadata,
26 });
27
28 // ── afterNote: mark boundary analysis as stale ──────────────────────
29 core.hooks.register("afterNote", async ({ nodeId, userId, action }) => {
30 if (action !== "create" && action !== "edit") return;
31 if (!userId || userId === "SYSTEM") return;
32
33 // Skip system nodes
34 try {
35 const node = await core.models.Node.findById(nodeId).select("systemRole").lean();
36 if (node?.systemRole) return;
37 } catch { return; }
38
39 // Find the tree root and mark its boundary analysis as stale
40 try {
41 const { resolveRootNode } = await import("../../seed/tree/treeFetch.js");
42 const root = await resolveRootNode(nodeId);
43 if (root?._id) {
44 markStale(root._id.toString()).catch(() => {});
45 }
46 } catch (err) {
47 log.debug("Boundary", "Failed to mark stale after note:", err.message);
48 }
49 }, "boundary");
50
51 // ── afterNodeMove: branch membership changed, both trees stale ────────
52 core.hooks.register("afterNodeMove", async ({ nodeId, oldParentId }) => {
53 try {
54 const { resolveRootNode } = await import("../../seed/tree/treeFetch.js");
55 // New tree (resolves from the moved node's current position)
56 const newRoot = await resolveRootNode(nodeId);
57 if (newRoot?._id) markStale(newRoot._id.toString()).catch(() => {});
58 // Old tree (resolves from the old parent, which still lives there)
59 const oldRoot = await resolveRootNode(oldParentId);
60 if (oldRoot?._id && oldRoot._id.toString() !== newRoot?._id?.toString()) {
61 markStale(oldRoot._id.toString()).catch(() => {});
62 }
63 } catch {}
64 }, "boundary");
65
66 // ── enrichContext: inject boundary findings ──────────────────────────
67 core.hooks.register("enrichContext", async ({ context, node, meta }) => {
68 const boundary = meta.boundary;
69 if (!boundary || !boundary.findings) return;
70
71 const activeFindings = boundary.findings;
72 if (activeFindings.length === 0 && boundary.overallCoherence == null) return;
73
74 // Only inject findings relevant to the current node's branch
75 const nodeId = node._id?.toString();
76 const relevant = activeFindings.filter(f =>
77 (f.nodes && f.nodes.includes(nodeId)) ||
78 (f.branches && f.branches.includes(nodeId))
79 );
80
81 if (relevant.length > 0) {
82 context.boundaryIssues = relevant.map(f => ({
83 type: f.type,
84 severity: f.severity,
85 description: f.description,
86 suggestion: f.suggestion,
87 }));
88 }
89
90 // Always include overall coherence
91 if (boundary.overallCoherence != null) {
92 context.treeCoherence = boundary.overallCoherence;
93 if (boundary.stale) {
94 context.treeCoherenceStale = true;
95 }
96 }
97 }, "boundary");
98
99 // ── Jobs ────────────────────────────────────────────────────────────
100 const { setModels: setJobModels, setMetadata: setJobMetadata, startBoundaryJob, stopBoundaryJob } = await import("./boundaryJob.js");
101 setJobModels(core.models);
102 setJobMetadata(core.metadata);
103
104 // ── Routes ──────────────────────────────────────────────────────────
105 const { default: router } = await import("./routes.js");
106
107 log.info("Boundary", "Structural cohesion analysis loaded");
108
109 return {
110 router,
111 tools,
112 jobs: [
113 {
114 name: "boundary-cycle",
115 start: () => startBoundaryJob(),
116 stop: () => stopBoundaryJob(),
117 },
118 ],
119 exports: {
120 analyze,
121 analyzeBranch,
122 getBoundaryReport,
123 getOrphanedNodes,
124 },
125 };
126}
127
1export default {
2 name: "boundary",
3 version: "1.0.1",
4 builtFor: "treeos-intelligence",
5 description:
6 "The tree knows where one concern ends and another begins. Branches grow " +
7 "organically. Over time the edges blur. Authentication code creeps into the " +
8 "API branch. Nutrition notes scatter across fitness, cooking, and health. " +
9 "A project plan fragment sits orphaned under a personal journal entry. The " +
10 "tree holds all of it but nobody mapped the zones. " +
11 "\n\n" +
12 "Boundary does structural cohesion analysis. For a given tree or subtree, it " +
13 "extracts the topic of each branch by examining node names, note content, " +
14 "compress essences, and codebook relationships. Then it builds a similarity " +
15 "matrix comparing all sibling branches pairwise. If the embed extension is " +
16 "installed, similarity uses vector cosine distance. Otherwise it falls back " +
17 "to LLM-based semantic comparison. " +
18 "\n\n" +
19 "Three patterns detected. Blurred boundaries: two sibling branches overlap " +
20 "significantly on the same topic. The user created both but didn't realize " +
21 "they're doing the same work in two places. Fragmented concepts: the same " +
22 "topic appears in three or more disconnected branches. Nobody consolidated. " +
23 "Orphaned nodes: individual nodes whose content is semantically distant from " +
24 "their parent branch's topic. A node about SSL certificates sitting under " +
25 "Marketing. It ended up there by accident and nobody moved it. " +
26 "\n\n" +
27 "Orphan detection degrades gracefully based on available infrastructure. " +
28 "With embed installed, per-node analysis runs on any branch size via cosine " +
29 "math. Without embed on small branches (20 nodes or fewer), LLM batch queries " +
30 "handle it. Without embed on large branches, per-node orphan detection is " +
31 "skipped entirely. The report notes the gap and suggests installing embed. " +
32 "Branch-level patterns (blurred, fragmented) always run regardless. " +
33 "\n\n" +
34 "Each branch gets a coherence score from 0 to 1. How tightly focused is the " +
35 "content? Low coherence means the branch mixes multiple concerns. High means " +
36 "every node in the branch is about the same thing. The tree root gets an " +
37 "overall coherence score. " +
38 "\n\n" +
39 "Findings feed directly into reroot. Orphaned nodes become move candidates. " +
40 "Fragmented concepts become consolidation targets. Blurred boundaries become " +
41 "merge suggestions. Boundary tells reroot WHY to move things instead of " +
42 "asking the AI to guess. Evidence-based reorganization instead of vibes. " +
43 "\n\n" +
44 "enrichContext injects boundary findings so the AI knows about structural " +
45 "issues at the current position. afterNote marks the analysis as stale when " +
46 "content changes. The analysis is on-demand or scheduled through the " +
47 "optional background job. Not at boot. Trees don't need boundary analysis " +
48 "every time the land restarts.",
49
50 needs: {
51 services: ["llm", "hooks", "contributions"],
52 models: ["Node", "Note"],
53 },
54
55 optional: {
56 services: ["energy"],
57 extensions: ["embed", "reroot", "evolution", "codebook", "tree-compress"],
58 },
59
60 provides: {
61 models: {},
62 routes: "./routes.js",
63 tools: true,
64 jobs: true,
65 orchestrator: false,
66 energyActions: {
67 boundaryAnalyze: { cost: 3 },
68 boundaryBranchScan: { cost: 1 },
69 },
70 sessionTypes: {},
71
72 hooks: {
73 fires: [],
74 listens: ["enrichContext", "afterNote"],
75 },
76
77 cli: [
78 {
79 command: "boundary [action]", scope: ["tree"],
80 description:
81 "Structural cohesion analysis. Actions: status, analyze, branch.",
82 method: "GET",
83 endpoint: "/root/:rootId/boundary",
84 subcommands: {
85 status: {
86 method: "GET",
87 endpoint: "/root/:rootId/boundary",
88 description: "Last analysis results and coherence scores",
89 },
90 branch: {
91 method: "POST",
92 endpoint: "/node/:nodeId/boundary/analyze",
93 description: "Analyze from current node down (subtree only)",
94 },
95 },
96 },
97 ],
98 },
99};
100
1import express from "express";
2import { sendOk, sendError, ERR } from "../../seed/protocol.js";
3import authenticate from "../../seed/middleware/authenticate.js";
4import { analyze, analyzeBranch, getBoundaryReport } from "./core.js";
5
6function validateRootId(req, res) {
7 const rootId = req.params.rootId;
8 if (!rootId || rootId === "undefined" || rootId === "null") {
9 sendError(res, 400, ERR.INVALID_INPUT, "rootId is required");
10 return null;
11 }
12 return rootId;
13}
14
15const router = express.Router();
16
17// GET /root/:rootId/boundary - Last analysis results
18router.get("/root/:rootId/boundary", authenticate, async (req, res) => {
19 try {
20 const rootId = validateRootId(req, res);
21 if (!rootId) return;
22 const report = await getBoundaryReport(rootId);
23 sendOk(res, {
24 rootId,
25 report: report || null,
26 stale: !!report?.stale,
27 lastAnalysis: report?.lastAnalysis || null,
28 });
29 } catch (err) {
30 sendError(res, 500, ERR.INTERNAL, err.message);
31 }
32});
33
34// POST /root/:rootId/boundary/analyze - Full tree analysis
35router.post("/root/:rootId/boundary/analyze", authenticate, async (req, res) => {
36 try {
37 const rootId = validateRootId(req, res);
38 if (!rootId) return;
39 const result = await analyze(rootId, req.userId, req.username);
40 sendOk(res, result);
41 } catch (err) {
42 sendError(res, 400, ERR.INVALID_INPUT, err.message);
43 }
44});
45
46// POST /node/:nodeId/boundary/analyze - Subtree analysis
47router.post("/node/:nodeId/boundary/analyze", authenticate, async (req, res) => {
48 try {
49 const nodeId = req.params.nodeId;
50 if (!nodeId || nodeId === "undefined" || nodeId === "null") {
51 return sendError(res, 400, ERR.INVALID_INPUT, "nodeId is required");
52 }
53 const result = await analyzeBranch(nodeId, req.userId, req.username);
54 sendOk(res, result);
55 } catch (err) {
56 sendError(res, 400, ERR.INVALID_INPUT, err.message);
57 }
58});
59
60export default router;
61
1import { z } from "zod";
2import { analyze, analyzeBranch, getBoundaryReport } from "./core.js";
3
4export default [
5 {
6 name: "boundary-analyze",
7 description:
8 "Run structural cohesion analysis on the entire tree. Finds blurred boundaries " +
9 "between overlapping branches, fragmented concepts spread across multiple branches, " +
10 "and orphaned nodes that don't belong where they are. Results stored on root metadata.",
11 schema: {
12 rootId: z.string().describe("Tree root to analyze."),
13 userId: z.string().describe("Injected by server. Ignore."),
14 chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
15 sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
16 },
17 annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true, openWorldHint: true },
18 handler: async ({ rootId, userId }) => {
19 try {
20 const User = (await import("../../seed/models/user.js")).default;
21 const user = await User.findById(userId).select("username").lean();
22 const result = await analyze(rootId, userId, user?.username || "system");
23 return {
24 content: [{
25 type: "text",
26 text: JSON.stringify({
27 overallCoherence: result.overallCoherence,
28 branchCount: result.branchCount,
29 nodeCount: result.nodeCount,
30 findingsCount: result.findings.length,
31 usedEmbeddings: result.usedEmbeddings,
32 degraded: result.degraded || [],
33 findings: result.findings,
34 }, null, 2),
35 }],
36 };
37 } catch (err) {
38 return { content: [{ type: "text", text: `Analysis failed: ${err.message}` }] };
39 }
40 },
41 },
42 {
43 name: "boundary-branch",
44 description:
45 "Run cohesion analysis on a subtree from the current node down. " +
46 "Lighter than full tree analysis. Good for checking a reorganized branch.",
47 schema: {
48 nodeId: z.string().describe("Node to analyze from (subtree root)."),
49 userId: z.string().describe("Injected by server. Ignore."),
50 chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
51 sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
52 },
53 annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: true, openWorldHint: true },
54 handler: async ({ nodeId, userId }) => {
55 try {
56 const User = (await import("../../seed/models/user.js")).default;
57 const user = await User.findById(userId).select("username").lean();
58 const result = await analyzeBranch(nodeId, userId, user?.username || "system");
59 return {
60 content: [{
61 type: "text",
62 text: JSON.stringify({
63 overallCoherence: result.overallCoherence,
64 branchCount: result.branchCount,
65 findingsCount: result.findings.length,
66 usedEmbeddings: result.usedEmbeddings,
67 degraded: result.degraded || [],
68 findings: result.findings,
69 }, null, 2),
70 }],
71 };
72 } catch (err) {
73 return { content: [{ type: "text", text: `Branch analysis failed: ${err.message}` }] };
74 }
75 },
76 },
77 {
78 name: "boundary-status",
79 description: "Show the last boundary analysis results. No LLM calls. Read-only.",
80 schema: {
81 rootId: z.string().describe("Tree root to check."),
82 userId: z.string().describe("Injected by server. Ignore."),
83 chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
84 sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
85 },
86 annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false },
87 handler: async ({ rootId }) => {
88 try {
89 const report = await getBoundaryReport(rootId);
90 if (!report || !report.lastAnalysis) {
91 return {
92 content: [{
93 type: "text",
94 text: "No boundary analysis has been run on this tree yet. Use boundary-analyze to run one.",
95 }],
96 };
97 }
98 return {
99 content: [{
100 type: "text",
101 text: JSON.stringify({
102 lastAnalysis: report.lastAnalysis,
103 stale: !!report.stale,
104 overallCoherence: report.overallCoherence,
105 branchCount: report.branchCount,
106 nodeCount: report.nodeCount,
107 usedEmbeddings: report.usedEmbeddings,
108 degraded: report.degraded || [],
109 findings: report.findings,
110 }, null, 2),
111 }],
112 };
113 } catch (err) {
114 return { content: [{ type: "text", text: `Status check failed: ${err.message}` }] };
115 }
116 },
117 },
118];
119
Loading comments...