1/**
2 * Embed Core
3 *
4 * Vector embeddings for notes. Two storage modes:
5 * - Internal: vectors in MongoDB on the Note document metadata
6 * - External: vectors in a dedicated vector store (Pinecone, Qdrant, etc.)
7 *
8 * Cosine similarity search across the full tree.
9 */
10
11import log from "../../seed/log.js";
12import Node from "../../seed/models/node.js";
13import Note from "../../seed/models/note.js";
14import { SYSTEM_ROLE, CONTENT_TYPE } from "../../seed/protocol.js";
15import { getDescendantIds } from "../../seed/tree/treeFetch.js";
16
17let _getClientForUser = null;
18export function setServices(services) {
19 _getClientForUser = services.getClientForUser;
20}
21
22// ─────────────────────────────────────────────────────────────────────────
23// CONFIG
24// ─────────────────────────────────────────────────────────────────────────
25
26const DEFAULTS = {
27 embeddingModel: null,
28 embeddingDimensions: 1536,
29 similarityThreshold: 0.75,
30 maxRelatedNotes: 10,
31 vectorStore: "internal",
32 maxContentChars: 8000,
33};
34
35export async function getEmbedConfig() {
36 const configNode = await Node.findOne({ systemRole: SYSTEM_ROLE.CONFIG }).select("metadata").lean();
37 if (!configNode) return { ...DEFAULTS };
38 const meta = configNode.metadata instanceof Map
39 ? configNode.metadata.get("embed") || {}
40 : configNode.metadata?.embed || {};
41 return { ...DEFAULTS, ...meta };
42}
43
44// ─────────────────────────────────────────────────────────────────────────
45// EMBEDDING
46// ─────────────────────────────────────────────────────────────────────────
47
48/**
49 * Generate a vector embedding for text content.
50 * Uses the land's LLM connection with OpenAI-compatible embeddings endpoint.
51 */
52export async function generateEmbedding(text, userId) {
53 if (!_getClientForUser) throw new Error("LLM service not available");
54 if (!text || text.trim().length === 0) return null;
55
56 const config = await getEmbedConfig();
57 const content = text.slice(0, config.maxContentChars);
58
59 // Resolve the embedding client. Use the configured embedding model slot,
60 // or fall back to the user's default LLM.
61 const { client, model } = await _getClientForUser(userId, config.embeddingModel || "main");
62 if (!client) throw new Error("No LLM connection available for embedding");
63
64 try {
65 const response = await client.embeddings.create({
66 model: model || "text-embedding-3-small",
67 input: content,
68 });
69
70 if (!response?.data?.[0]?.embedding) {
71 throw new Error("Embedding response missing data");
72 }
73
74 return response.data[0].embedding;
75 } catch (err) {
76 // Some endpoints don't support embeddings. Log and return null, don't crash.
77 if (err.status === 404 || err.message?.includes("not found")) {
78 log.debug("Embed", `Embedding endpoint not available: ${err.message}`);
79 return null;
80 }
81 throw err;
82 }
83}
84
85/**
86 * Store a vector on a note's metadata.
87 */
88export async function storeVector(noteId, vector) {
89 await Note.findByIdAndUpdate(noteId, {
90 $set: {
91 "metadata.embed.vector": vector,
92 "metadata.embed.embeddedAt": new Date().toISOString(),
93 },
94 });
95}
96
97/**
98 * Embed a single note. Returns the vector or null.
99 */
100export async function embedNote(noteId, userId) {
101 const note = await Note.findById(noteId).select("content contentType").lean();
102 if (!note || note.contentType !== CONTENT_TYPE.TEXT || !note.content) return null;
103
104 const vector = await generateEmbedding(note.content, userId);
105 if (!vector) return null;
106
107 await storeVector(noteId, vector);
108 return vector;
109}
110
111// ─────────────────────────────────────────────────────────────────────────
112// SIMILARITY SEARCH
113// ─────────────────────────────────────────────────────────────────────────
114
115/**
116 * Cosine similarity between two vectors.
117 */
118function cosineSimilarity(a, b) {
119 if (!a || !b || a.length !== b.length) return 0;
120 let dot = 0, normA = 0, normB = 0;
121 for (let i = 0; i < a.length; i++) {
122 dot += a[i] * b[i];
123 normA += a[i] * a[i];
124 normB += b[i] * b[i];
125 }
126 const denom = Math.sqrt(normA) * Math.sqrt(normB);
127 return denom === 0 ? 0 : dot / denom;
128}
129
130/**
131 * Build a scoped set of node IDs for search.
132 *
133 * Default scope: the current node's parent subtree plus sibling branches
134 * plus the tree root's direct children. Covers the most likely related
135 * content without loading every note on the land.
136 *
137 * At 50K notes with 1536-dim vectors, a full tree scan loads ~300MB.
138 * Scoped search keeps memory bounded to the subtree size.
139 *
140 * @param {string} nodeId - the search origin
141 * @param {string} rootId - the tree root
142 * @param {boolean} searchAll - if true, search the entire tree
143 * @returns {string[]} node IDs to include in the search
144 */
145async function buildSearchScope(nodeId, rootId, searchAll) {
146 if (searchAll) {
147 return getDescendantIds(rootId);
148 }
149
150 const scopeIds = new Set();
151
152 // 1. Walk up to find the nearest ancestor 2 levels above current position
153 // (parent's parent). Search that entire subtree.
154 let cursor = nodeId;
155 let depth = 0;
156 const maxUp = 2;
157
158 while (cursor && depth < maxUp) {
159 const n = await Node.findById(cursor).select("parent").lean();
160 if (!n || !n.parent) break;
161 cursor = n.parent.toString();
162 depth++;
163 }
164
165 // cursor is now the scoping ancestor (grandparent or as high as we got)
166 const subtreeIds = await getDescendantIds(cursor);
167 for (const id of subtreeIds) scopeIds.add(id);
168
169 // 2. Add the tree root's direct children (top-level branches)
170 // so cross-branch discovery still works for major topics
171 const root = await Node.findById(rootId).select("children").lean();
172 if (root?.children) {
173 for (const childId of root.children) {
174 scopeIds.add(childId.toString());
175 // And their direct children (one level deep into each branch)
176 const branch = await Node.findById(childId).select("children").lean();
177 if (branch?.children) {
178 for (const grandchild of branch.children) {
179 scopeIds.add(grandchild.toString());
180 }
181 }
182 }
183 }
184
185 return [...scopeIds];
186}
187
188/**
189 * Find notes semantically similar to a query vector.
190 * Scoped search: only loads vectors from relevant subtree, not the entire land.
191 *
192 * @param {number[]} queryVector
193 * @param {string} rootId - tree root
194 * @param {object} opts - { threshold, maxResults, excludeNoteIds, nodeId, searchAll }
195 * @returns {Array<{ noteId, nodeId, nodeName, similarity, snippet }>}
196 */
197export async function findSimilar(queryVector, rootId, opts = {}) {
198 const config = await getEmbedConfig();
199 const threshold = opts.threshold || config.similarityThreshold;
200 const maxResults = opts.maxResults || config.maxRelatedNotes;
201 const excludeIds = new Set(opts.excludeNoteIds || []);
202
203 // Build the scoped node set
204 const nodeIds = await buildSearchScope(opts.nodeId || rootId, rootId, opts.searchAll || false);
205
206 // Load embedded notes only from scoped nodes
207 const notes = await Note.find({
208 nodeId: { $in: nodeIds },
209 contentType: CONTENT_TYPE.TEXT,
210 "metadata.embed.vector": { $exists: true },
211 })
212 .select("_id nodeId content metadata")
213 .lean();
214
215 // Score each note
216 const scored = [];
217 for (const note of notes) {
218 if (excludeIds.has(note._id)) continue;
219
220 const noteVector = note.metadata instanceof Map
221 ? note.metadata.get("embed")?.vector
222 : note.metadata?.embed?.vector;
223
224 if (!noteVector) continue;
225
226 const similarity = cosineSimilarity(queryVector, noteVector);
227 if (similarity >= threshold) {
228 scored.push({
229 noteId: note._id,
230 nodeId: note.nodeId,
231 similarity: Math.round(similarity * 1000) / 1000,
232 snippet: note.content.slice(0, 200),
233 });
234 }
235 }
236
237 // Sort by similarity descending, cap results
238 scored.sort((a, b) => b.similarity - a.similarity);
239 const results = scored.slice(0, maxResults);
240
241 // Resolve node names
242 for (const r of results) {
243 const node = await Node.findById(r.nodeId).select("name").lean();
244 r.nodeName = node?.name || r.nodeId;
245 }
246
247 return results;
248}
249
250/**
251 * Find notes related to a specific node's content.
252 * Scoped to the node's local neighborhood by default.
253 * Pass searchAll: true for land-wide.
254 */
255export async function findRelatedAtNode(nodeId, userId, rootId, searchAll = false) {
256 // Get the most recent note at this node
257 const note = await Note.findOne({
258 nodeId,
259 contentType: CONTENT_TYPE.TEXT,
260 })
261 .sort({ createdAt: -1 })
262 .select("_id content metadata")
263 .lean();
264
265 if (!note || !note.content) return [];
266
267 // Use existing vector or generate one
268 let queryVector = note.metadata instanceof Map
269 ? note.metadata.get("embed")?.vector
270 : note.metadata?.embed?.vector;
271
272 if (!queryVector) {
273 queryVector = await generateEmbedding(note.content, userId);
274 if (!queryVector) return [];
275 }
276
277 // Find the tree root if not provided
278 if (!rootId) {
279 try {
280 const { resolveRootNode } = await import("../../seed/tree/treeFetch.js");
281 const root = await resolveRootNode(nodeId);
282 rootId = root?._id;
283 } catch (err) {
284 log.debug("Embed", "Root resolution failed:", err.message);
285 }
286 }
287
288 if (!rootId) return [];
289
290 return findSimilar(queryVector, rootId, {
291 excludeNoteIds: [note._id],
292 nodeId,
293 searchAll,
294 });
295}
296
297// ─────────────────────────────────────────────────────────────────────────
298// STATUS
299// ─────────────────────────────────────────────────────────────────────────
300
301/**
302 * Get embedding coverage stats.
303 */
304export async function getEmbedStatus() {
305 const totalNotes = await Note.countDocuments({ contentType: CONTENT_TYPE.TEXT });
306 const embeddedNotes = await Note.countDocuments({
307 contentType: CONTENT_TYPE.TEXT,
308 "metadata.embed.vector": { $exists: true },
309 });
310
311 const coverage = totalNotes > 0 ? Math.round((embeddedNotes / totalNotes) * 1000) / 10 : 0;
312
313 return {
314 totalTextNotes: totalNotes,
315 embeddedNotes,
316 coveragePercent: coverage,
317 };
318}
319
320// ─────────────────────────────────────────────────────────────────────────
321// REBUILD
322// ─────────────────────────────────────────────────────────────────────────
323
324/**
325 * Re-embed all text notes. For use after changing embedding model.
326 * Processes in batches, yields progress.
327 */
328export async function rebuildEmbeddings(userId, onProgress) {
329 const notes = await Note.find({ contentType: CONTENT_TYPE.TEXT })
330 .select("_id content")
331 .lean();
332
333 let embedded = 0;
334 let failed = 0;
335
336 for (const note of notes) {
337 if (!note.content || note.content.trim().length === 0) continue;
338
339 try {
340 const vector = await generateEmbedding(note.content, userId);
341 if (vector) {
342 await storeVector(note._id, vector);
343 embedded++;
344 } else {
345 failed++;
346 }
347 } catch (err) {
348 failed++;
349 log.debug("Embed", `Rebuild failed for note ${note._id}: ${err.message}`);
350 }
351
352 if (onProgress && (embedded + failed) % 50 === 0) {
353 onProgress({ embedded, failed, total: notes.length });
354 }
355 }
356
357 log.verbose("Embed", `Rebuild complete: ${embedded} embedded, ${failed} failed out of ${notes.length}`);
358 return { embedded, failed, total: notes.length };
359}
360
1import log from "../../seed/log.js";
2import tools from "./tools.js";
3import { setServices, embedNote, findSimilar, getEmbedConfig } from "./core.js";
4import { CONTENT_TYPE } from "../../seed/protocol.js";
5
6export async function init(core) {
7 setServices({
8 getClientForUser: core.llm.getClientForUser,
9 });
10
11 // ── afterNote: embed every new text note ───────────────────────────
12 core.hooks.register("afterNote", async ({ note, nodeId, userId, contentType, action }) => {
13 if (contentType !== CONTENT_TYPE.TEXT) return;
14 if (action !== "create" && action !== "edit") return;
15 if (!userId || userId === "SYSTEM") return;
16
17 // Skip system nodes
18 try {
19 const Node = core.models.Node;
20 const node = await Node.findById(nodeId).select("systemRole").lean();
21 if (node?.systemRole) return;
22 } catch { return; }
23
24 // Embed in background, don't block note write
25 embedNote(note._id || note.id, userId).catch((err) => {
26 log.debug("Embed", `Background embedding failed for note at ${nodeId}: ${err.message}`);
27 });
28 }, "embed");
29
30 // ── enrichContext: inject semantically related notes ────────────────
31 core.hooks.register("enrichContext", async ({ context, node, meta, userId }) => {
32 if (!userId) return;
33 if (node.systemRole) return;
34
35 // Don't run expensive search on every enrichContext. Only if the node
36 // has notes with embeddings. Check meta for cached related or skip.
37 const embedMeta = meta.embed;
38 if (!embedMeta) return;
39
40 // Find the tree root
41 let rootId;
42 if (node.rootOwner) {
43 rootId = node._id;
44 } else {
45 try {
46 const { resolveRootNode } = await import("../../seed/tree/treeFetch.js");
47 const root = await resolveRootNode(node._id);
48 rootId = root?._id;
49 } catch { return; }
50 }
51 if (!rootId) return;
52
53 // Get the most recent note's vector at this node
54 const Note = core.models.Note;
55 const recentNote = await Note.findOne({
56 nodeId: node._id,
57 contentType: CONTENT_TYPE.TEXT,
58 "metadata.embed.vector": { $exists: true },
59 })
60 .sort({ createdAt: -1 })
61 .select("_id metadata")
62 .lean();
63
64 if (!recentNote) return;
65
66 const vector = recentNote.metadata instanceof Map
67 ? recentNote.metadata.get("embed")?.vector
68 : recentNote.metadata?.embed?.vector;
69
70 if (!vector) return;
71
72 try {
73 const config = await getEmbedConfig();
74 const related = await findSimilar(vector, rootId, {
75 maxResults: 5,
76 threshold: config.similarityThreshold,
77 excludeNoteIds: [recentNote._id],
78 nodeId: node._id,
79 });
80
81 if (related.length > 0) {
82 context.relatedNotes = related.map((r) => ({
83 nodeName: r.nodeName,
84 similarity: r.similarity,
85 snippet: r.snippet,
86 }));
87 }
88 } catch (err) {
89 log.debug("Embed", "Related notes enrichment failed:", err.message);
90 }
91 }, "embed");
92
93 const { default: router } = await import("./routes.js");
94
95 return {
96 router,
97 tools,
98 exports: {
99 embedNote,
100 findSimilar,
101 findRelatedAtNode: (await import("./core.js")).findRelatedAtNode,
102 generateEmbedding: (await import("./core.js")).generateEmbedding,
103 },
104 };
105}
106
1export default {
2 name: "embed",
3 version: "1.0.1",
4 builtFor: "treeos-intelligence",
5 description:
6 "Every note gets a vector embedding when written. The tree structure is the skeleton. The " +
7 "embeddings are the magnetic field between bones. Two notes on opposite branches that are " +
8 "semantically related find each other without any explicit link. The tree hierarchy says " +
9 "these are far apart. The vector space says these mean the same thing. Three layers working " +
10 "together: the tree is navigation (parent, children, position), the graph is explicit " +
11 "connections (cascade, codebook, contributors), the vectors are implicit connections (nobody " +
12 "linked these two notes, but they are about the same thing). enrichContext injects related " +
13 "notes into the AI context. The AI at /Health/Fitness sees a semantically related note from " +
14 "/Health/Food about protein timing without either branch explicitly referencing the other. " +
15 "Per-viewer relevance when inverse-tree is installed. The tree holds structure. The vectors " +
16 "hold meaning. Together the tree knows not just where things are but what things are like " +
17 "each other. Navigation finds things by position. Embedding finds things by resonance.",
18
19 needs: {
20 services: ["llm"],
21 models: ["Node", "Note"],
22 },
23
24 optional: {
25 extensions: ["inverse-tree"],
26 },
27
28 provides: {
29 models: {},
30 routes: "./routes.js",
31 tools: true,
32 jobs: true,
33 orchestrator: false,
34 energyActions: {},
35 sessionTypes: {},
36 env: [],
37
38 cli: [
39 {
40 command: "related", scope: ["tree"],
41 description: "Semantically similar notes at this position",
42 method: "GET",
43 endpoint: "/node/:nodeId/related",
44 },
45 {
46 command: "embed [action]",
47 description: "Embedding status and management. Actions: status, rebuild.",
48 method: "GET",
49 endpoint: "/embed/status",
50 subcommands: {
51 "status": { method: "GET", endpoint: "/embed/status", description: "Embedding coverage percentage" },
52 "rebuild": { method: "POST", endpoint: "/embed/rebuild", description: "Re-embed all notes" },
53 },
54 },
55 ],
56
57 hooks: {
58 fires: [],
59 listens: ["afterNote", "enrichContext"],
60 },
61 },
62};
63
1import express from "express";
2import authenticate from "../../seed/middleware/authenticate.js";
3import { sendOk, sendError, ERR } from "../../seed/protocol.js";
4import { findRelatedAtNode, getEmbedStatus } from "./core.js";
5
6const router = express.Router();
7
8// GET /node/:nodeId/related - semantically similar notes
9router.get("/node/:nodeId/related", authenticate, async (req, res) => {
10 try {
11 const searchAll = req.query.all === "true" || req.query.all === "1";
12 const results = await findRelatedAtNode(req.params.nodeId, req.userId, req.query.rootId || null, searchAll);
13 sendOk(res, { count: results.length, results });
14 } catch (err) {
15 sendError(res, 500, ERR.INTERNAL, err.message);
16 }
17});
18
19// GET /embed/status - coverage stats
20router.get("/embed/status", authenticate, async (req, res) => {
21 try {
22 const status = await getEmbedStatus();
23 sendOk(res, status);
24 } catch (err) {
25 sendError(res, 500, ERR.INTERNAL, err.message);
26 }
27});
28
29export default router;
30
1import { z } from "zod";
2import { findRelatedAtNode, getEmbedStatus, rebuildEmbeddings } from "./core.js";
3
4export default [
5 {
6 name: "related-notes",
7 description:
8 "Find notes semantically similar to the content at this node. Scoped to the local neighborhood (parent subtree plus sibling branches) by default. Pass searchAll for land-wide.",
9 schema: {
10 nodeId: z.string().describe("The node to find related content for."),
11 rootId: z.string().optional().describe("Tree root to search within. Auto-resolved if omitted."),
12 searchAll: z.boolean().optional().default(false).describe("Search entire tree instead of scoped neighborhood."),
13 userId: z.string().describe("Injected by server. Ignore."),
14 chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
15 sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
16 },
17 annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false },
18 handler: async ({ nodeId, rootId, searchAll, userId }) => {
19 try {
20 const results = await findRelatedAtNode(nodeId, userId, rootId, searchAll);
21 if (results.length === 0) {
22 return { content: [{ type: "text", text: "No semantically related notes found. Either no notes are embedded yet, or nothing passes the similarity threshold." }] };
23 }
24 return {
25 content: [{
26 type: "text",
27 text: JSON.stringify({
28 query: nodeId,
29 relatedCount: results.length,
30 results: results.map((r) => ({
31 nodeName: r.nodeName,
32 nodeId: r.nodeId,
33 similarity: r.similarity,
34 snippet: r.snippet,
35 })),
36 }, null, 2),
37 }],
38 };
39 } catch (err) {
40 return { content: [{ type: "text", text: `Search failed: ${err.message}` }] };
41 }
42 },
43 },
44 {
45 name: "embed-status",
46 description: "Show embedding coverage. How many notes have vectors, what percentage of the total.",
47 schema: {
48 userId: z.string().describe("Injected by server. Ignore."),
49 chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
50 sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
51 },
52 annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false },
53 handler: async () => {
54 try {
55 const status = await getEmbedStatus();
56 return { content: [{ type: "text", text: JSON.stringify(status, null, 2) }] };
57 } catch (err) {
58 return { content: [{ type: "text", text: `Status failed: ${err.message}` }] };
59 }
60 },
61 },
62 {
63 name: "embed-rebuild",
64 description: "Re-embed all text notes. Use after changing the embedding model. Token-intensive.",
65 schema: {
66 userId: z.string().describe("Injected by server. Ignore."),
67 chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
68 sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
69 },
70 annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
71 handler: async ({ userId }) => {
72 try {
73 const result = await rebuildEmbeddings(userId);
74 return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
75 } catch (err) {
76 return { content: [{ type: "text", text: `Rebuild failed: ${err.message}` }] };
77 }
78 },
79 },
80];
81
Loading comments...