embed - Canopy Horizon

EXTENSION for treeos-intelligence

embed

Every note gets a vector embedding when written. The tree structure is the skeleton. The embeddings are the magnetic field between bones. Two notes on opposite branches that are semantically related find each other without any explicit link. The tree hierarchy says these are far apart. The vector space says these mean the same thing. Three layers working together: the tree is navigation (parent, children, position), the graph is explicit connections (cascade, codebook, contributors), the vectors are implicit connections (nobody linked these two notes, but they are about the same thing). enrichContext injects related notes into the AI context. The AI at /Health/Fitness sees a semantically related note from /Health/Food about protein timing without either branch explicitly referencing the other. Per-viewer relevance when inverse-tree is installed. The tree holds structure. The vectors hold meaning. Together the tree knows not just where things are but what things are like each other. Navigation finds things by position. Embedding finds things by resonance.

v1.0.1 by TreeOS Site 0 downloads 5 files 640 lines 20.8 KB published 38d ago

treeos ext install embed

View changelog

Manifest

Provides

routes
tools
jobs
2 CLI commands

Requires

services: llm
models: Node, Note

Optional

extensions: inverse-tree

SHA256: 47a98a5f7a53edd3f7ff90ccb8366ab4dfc4eaea34b027e05f55a7710d21da7c

Dependents

1 package depend on this

Package	Type	Relationship
treeos-intelligence v1.0.2	bundle	includes

CLI Commands

Command	Method	Description
`related`	GET	Semantically similar notes at this position
`embed`	GET	Embedding status and management. Actions: status, rebuild.
`embed status`	GET	Embedding coverage percentage
`embed rebuild`	POST	Re-embed all notes

Hooks

Listens To

afterNote
enrichContext

Source Code

1/**
2 * Embed Core
3 *
4 * Vector embeddings for notes. Two storage modes:
5 * - Internal: vectors in MongoDB on the Note document metadata
6 * - External: vectors in a dedicated vector store (Pinecone, Qdrant, etc.)
7 *
8 * Cosine similarity search across the full tree.
9 */
10
11import log from "../../seed/log.js";
12import Node from "../../seed/models/node.js";
13import Note from "../../seed/models/note.js";
14import { SYSTEM_ROLE, CONTENT_TYPE } from "../../seed/protocol.js";
15import { getDescendantIds } from "../../seed/tree/treeFetch.js";
16
17let _getClientForUser = null;
18export function setServices(services) {
19  _getClientForUser = services.getClientForUser;
20}
21
22// ─────────────────────────────────────────────────────────────────────────
23// CONFIG
24// ─────────────────────────────────────────────────────────────────────────
25
26const DEFAULTS = {
27  embeddingModel: null,
28  embeddingDimensions: 1536,
29  similarityThreshold: 0.75,
30  maxRelatedNotes: 10,
31  vectorStore: "internal",
32  maxContentChars: 8000,
33};
34
35export async function getEmbedConfig() {
36  const configNode = await Node.findOne({ systemRole: SYSTEM_ROLE.CONFIG }).select("metadata").lean();
37  if (!configNode) return { ...DEFAULTS };
38  const meta = configNode.metadata instanceof Map
39    ? configNode.metadata.get("embed") || {}
40    : configNode.metadata?.embed || {};
41  return { ...DEFAULTS, ...meta };
42}
43
44// ─────────────────────────────────────────────────────────────────────────
45// EMBEDDING
46// ─────────────────────────────────────────────────────────────────────────
47
48/**
49 * Generate a vector embedding for text content.
50 * Uses the land's LLM connection with OpenAI-compatible embeddings endpoint.
51 */
52export async function generateEmbedding(text, userId) {
53  if (!_getClientForUser) throw new Error("LLM service not available");
54  if (!text || text.trim().length === 0) return null;
55
56  const config = await getEmbedConfig();
57  const content = text.slice(0, config.maxContentChars);
58
59  // Resolve the embedding client. Use the configured embedding model slot,
60  // or fall back to the user's default LLM.
61  const { client, model } = await _getClientForUser(userId, config.embeddingModel || "main");
62  if (!client) throw new Error("No LLM connection available for embedding");
63
64  try {
65    const response = await client.embeddings.create({
66      model: model || "text-embedding-3-small",
67      input: content,
68    });
69
70    if (!response?.data?.[0]?.embedding) {
71      throw new Error("Embedding response missing data");
72    }
73
74    return response.data[0].embedding;
75  } catch (err) {
76    // Some endpoints don't support embeddings. Log and return null, don't crash.
77    if (err.status === 404 || err.message?.includes("not found")) {
78      log.debug("Embed", `Embedding endpoint not available: ${err.message}`);
79      return null;
80    }
81    throw err;
82  }
83}
84
85/**
86 * Store a vector on a note's metadata.
87 */
88export async function storeVector(noteId, vector) {
89  await Note.findByIdAndUpdate(noteId, {
90    $set: {
91      "metadata.embed.vector": vector,
92      "metadata.embed.embeddedAt": new Date().toISOString(),
93    },
94  });
95}
96
97/**
98 * Embed a single note. Returns the vector or null.
99 */
100export async function embedNote(noteId, userId) {
101  const note = await Note.findById(noteId).select("content contentType").lean();
102  if (!note || note.contentType !== CONTENT_TYPE.TEXT || !note.content) return null;
103
104  const vector = await generateEmbedding(note.content, userId);
105  if (!vector) return null;
106
107  await storeVector(noteId, vector);
108  return vector;
109}
110
111// ─────────────────────────────────────────────────────────────────────────
112// SIMILARITY SEARCH
113// ─────────────────────────────────────────────────────────────────────────
114
115/**
116 * Cosine similarity between two vectors.
117 */
118function cosineSimilarity(a, b) {
119  if (!a || !b || a.length !== b.length) return 0;
120  let dot = 0, normA = 0, normB = 0;
121  for (let i = 0; i < a.length; i++) {
122    dot += a[i] * b[i];
123    normA += a[i] * a[i];
124    normB += b[i] * b[i];
125  }
126  const denom = Math.sqrt(normA) * Math.sqrt(normB);
127  return denom === 0 ? 0 : dot / denom;
128}
129
130/**
131 * Build a scoped set of node IDs for search.
132 *
133 * Default scope: the current node's parent subtree plus sibling branches
134 * plus the tree root's direct children. Covers the most likely related
135 * content without loading every note on the land.
136 *
137 * At 50K notes with 1536-dim vectors, a full tree scan loads ~300MB.
138 * Scoped search keeps memory bounded to the subtree size.
139 *
140 * @param {string} nodeId - the search origin
141 * @param {string} rootId - the tree root
142 * @param {boolean} searchAll - if true, search the entire tree
143 * @returns {string[]} node IDs to include in the search
144 */
145async function buildSearchScope(nodeId, rootId, searchAll) {
146  if (searchAll) {
147    return getDescendantIds(rootId);
148  }
149
150  const scopeIds = new Set();
151
152  // 1. Walk up to find the nearest ancestor 2 levels above current position
153  //    (parent's parent). Search that entire subtree.
154  let cursor = nodeId;
155  let depth = 0;
156  const maxUp = 2;
157
158  while (cursor && depth < maxUp) {
159    const n = await Node.findById(cursor).select("parent").lean();
160    if (!n || !n.parent) break;
161    cursor = n.parent.toString();
162    depth++;
163  }
164
165  // cursor is now the scoping ancestor (grandparent or as high as we got)
166  const subtreeIds = await getDescendantIds(cursor);
167  for (const id of subtreeIds) scopeIds.add(id);
168
169  // 2. Add the tree root's direct children (top-level branches)
170  //    so cross-branch discovery still works for major topics
171  const root = await Node.findById(rootId).select("children").lean();
172  if (root?.children) {
173    for (const childId of root.children) {
174      scopeIds.add(childId.toString());
175      // And their direct children (one level deep into each branch)
176      const branch = await Node.findById(childId).select("children").lean();
177      if (branch?.children) {
178        for (const grandchild of branch.children) {
179          scopeIds.add(grandchild.toString());
180        }
181      }
182    }
183  }
184
185  return [...scopeIds];
186}
187
188/**
189 * Find notes semantically similar to a query vector.
190 * Scoped search: only loads vectors from relevant subtree, not the entire land.
191 *
192 * @param {number[]} queryVector
193 * @param {string} rootId - tree root
194 * @param {object} opts - { threshold, maxResults, excludeNoteIds, nodeId, searchAll }
195 * @returns {Array<{ noteId, nodeId, nodeName, similarity, snippet }>}
196 */
197export async function findSimilar(queryVector, rootId, opts = {}) {
198  const config = await getEmbedConfig();
199  const threshold = opts.threshold || config.similarityThreshold;
200  const maxResults = opts.maxResults || config.maxRelatedNotes;
201  const excludeIds = new Set(opts.excludeNoteIds || []);
202
203  // Build the scoped node set
204  const nodeIds = await buildSearchScope(opts.nodeId || rootId, rootId, opts.searchAll || false);
205
206  // Load embedded notes only from scoped nodes
207  const notes = await Note.find({
208    nodeId: { $in: nodeIds },
209    contentType: CONTENT_TYPE.TEXT,
210    "metadata.embed.vector": { $exists: true },
211  })
212    .select("_id nodeId content metadata")
213    .lean();
214
215  // Score each note
216  const scored = [];
217  for (const note of notes) {
218    if (excludeIds.has(note._id)) continue;
219
220    const noteVector = note.metadata instanceof Map
221      ? note.metadata.get("embed")?.vector
222      : note.metadata?.embed?.vector;
223
224    if (!noteVector) continue;
225
226    const similarity = cosineSimilarity(queryVector, noteVector);
227    if (similarity >= threshold) {
228      scored.push({
229        noteId: note._id,
230        nodeId: note.nodeId,
231        similarity: Math.round(similarity * 1000) / 1000,
232        snippet: note.content.slice(0, 200),
233      });
234    }
235  }
236
237  // Sort by similarity descending, cap results
238  scored.sort((a, b) => b.similarity - a.similarity);
239  const results = scored.slice(0, maxResults);
240
241  // Resolve node names
242  for (const r of results) {
243    const node = await Node.findById(r.nodeId).select("name").lean();
244    r.nodeName = node?.name || r.nodeId;
245  }
246
247  return results;
248}
249
250/**
251 * Find notes related to a specific node's content.
252 * Scoped to the node's local neighborhood by default.
253 * Pass searchAll: true for land-wide.
254 */
255export async function findRelatedAtNode(nodeId, userId, rootId, searchAll = false) {
256  // Get the most recent note at this node
257  const note = await Note.findOne({
258    nodeId,
259    contentType: CONTENT_TYPE.TEXT,
260  })
261    .sort({ createdAt: -1 })
262    .select("_id content metadata")
263    .lean();
264
265  if (!note || !note.content) return [];
266
267  // Use existing vector or generate one
268  let queryVector = note.metadata instanceof Map
269    ? note.metadata.get("embed")?.vector
270    : note.metadata?.embed?.vector;
271
272  if (!queryVector) {
273    queryVector = await generateEmbedding(note.content, userId);
274    if (!queryVector) return [];
275  }
276
277  // Find the tree root if not provided
278  if (!rootId) {
279    try {
280      const { resolveRootNode } = await import("../../seed/tree/treeFetch.js");
281      const root = await resolveRootNode(nodeId);
282      rootId = root?._id;
283    } catch (err) {
284      log.debug("Embed", "Root resolution failed:", err.message);
285    }
286  }
287
288  if (!rootId) return [];
289
290  return findSimilar(queryVector, rootId, {
291    excludeNoteIds: [note._id],
292    nodeId,
293    searchAll,
294  });
295}
296
297// ─────────────────────────────────────────────────────────────────────────
298// STATUS
299// ─────────────────────────────────────────────────────────────────────────
300
301/**
302 * Get embedding coverage stats.
303 */
304export async function getEmbedStatus() {
305  const totalNotes = await Note.countDocuments({ contentType: CONTENT_TYPE.TEXT });
306  const embeddedNotes = await Note.countDocuments({
307    contentType: CONTENT_TYPE.TEXT,
308    "metadata.embed.vector": { $exists: true },
309  });
310
311  const coverage = totalNotes > 0 ? Math.round((embeddedNotes / totalNotes) * 1000) / 10 : 0;
312
313  return {
314    totalTextNotes: totalNotes,
315    embeddedNotes,
316    coveragePercent: coverage,
317  };
318}
319
320// ─────────────────────────────────────────────────────────────────────────
321// REBUILD
322// ─────────────────────────────────────────────────────────────────────────
323
324/**
325 * Re-embed all text notes. For use after changing embedding model.
326 * Processes in batches, yields progress.
327 */
328export async function rebuildEmbeddings(userId, onProgress) {
329  const notes = await Note.find({ contentType: CONTENT_TYPE.TEXT })
330    .select("_id content")
331    .lean();
332
333  let embedded = 0;
334  let failed = 0;
335
336  for (const note of notes) {
337    if (!note.content || note.content.trim().length === 0) continue;
338
339    try {
340      const vector = await generateEmbedding(note.content, userId);
341      if (vector) {
342        await storeVector(note._id, vector);
343        embedded++;
344      } else {
345        failed++;
346      }
347    } catch (err) {
348      failed++;
349      log.debug("Embed", `Rebuild failed for note ${note._id}: ${err.message}`);
350    }
351
352    if (onProgress && (embedded + failed) % 50 === 0) {
353      onProgress({ embedded, failed, total: notes.length });
354    }
355  }
356
357  log.verbose("Embed", `Rebuild complete: ${embedded} embedded, ${failed} failed out of ${notes.length}`);
358  return { embedded, failed, total: notes.length };
359}
360

1import log from "../../seed/log.js";
2import tools from "./tools.js";
3import { setServices, embedNote, findSimilar, getEmbedConfig } from "./core.js";
4import { CONTENT_TYPE } from "../../seed/protocol.js";
5
6export async function init(core) {
7  setServices({
8    getClientForUser: core.llm.getClientForUser,
9  });
10
11  // ── afterNote: embed every new text note ───────────────────────────
12  core.hooks.register("afterNote", async ({ note, nodeId, userId, contentType, action }) => {
13    if (contentType !== CONTENT_TYPE.TEXT) return;
14    if (action !== "create" && action !== "edit") return;
15    if (!userId || userId === "SYSTEM") return;
16
17    // Skip system nodes
18    try {
19      const Node = core.models.Node;
20      const node = await Node.findById(nodeId).select("systemRole").lean();
21      if (node?.systemRole) return;
22    } catch { return; }
23
24    // Embed in background, don't block note write
25    embedNote(note._id || note.id, userId).catch((err) => {
26      log.debug("Embed", `Background embedding failed for note at ${nodeId}: ${err.message}`);
27    });
28  }, "embed");
29
30  // ── enrichContext: inject semantically related notes ────────────────
31  core.hooks.register("enrichContext", async ({ context, node, meta, userId }) => {
32    if (!userId) return;
33    if (node.systemRole) return;
34
35    // Don't run expensive search on every enrichContext. Only if the node
36    // has notes with embeddings. Check meta for cached related or skip.
37    const embedMeta = meta.embed;
38    if (!embedMeta) return;
39
40    // Find the tree root
41    let rootId;
42    if (node.rootOwner) {
43      rootId = node._id;
44    } else {
45      try {
46        const { resolveRootNode } = await import("../../seed/tree/treeFetch.js");
47        const root = await resolveRootNode(node._id);
48        rootId = root?._id;
49      } catch { return; }
50    }
51    if (!rootId) return;
52
53    // Get the most recent note's vector at this node
54    const Note = core.models.Note;
55    const recentNote = await Note.findOne({
56      nodeId: node._id,
57      contentType: CONTENT_TYPE.TEXT,
58      "metadata.embed.vector": { $exists: true },
59    })
60      .sort({ createdAt: -1 })
61      .select("_id metadata")
62      .lean();
63
64    if (!recentNote) return;
65
66    const vector = recentNote.metadata instanceof Map
67      ? recentNote.metadata.get("embed")?.vector
68      : recentNote.metadata?.embed?.vector;
69
70    if (!vector) return;
71
72    try {
73      const config = await getEmbedConfig();
74      const related = await findSimilar(vector, rootId, {
75        maxResults: 5,
76        threshold: config.similarityThreshold,
77        excludeNoteIds: [recentNote._id],
78        nodeId: node._id,
79      });
80
81      if (related.length > 0) {
82        context.relatedNotes = related.map((r) => ({
83          nodeName: r.nodeName,
84          similarity: r.similarity,
85          snippet: r.snippet,
86        }));
87      }
88    } catch (err) {
89      log.debug("Embed", "Related notes enrichment failed:", err.message);
90    }
91  }, "embed");
92
93  const { default: router } = await import("./routes.js");
94
95  return {
96    router,
97    tools,
98    exports: {
99      embedNote,
100      findSimilar,
101      findRelatedAtNode: (await import("./core.js")).findRelatedAtNode,
102      generateEmbedding: (await import("./core.js")).generateEmbedding,
103    },
104  };
105}
106

1export default {
2  name: "embed",
3  version: "1.0.1",
4  builtFor: "treeos-intelligence",
5  description:
6    "Every note gets a vector embedding when written. The tree structure is the skeleton. The " +
7    "embeddings are the magnetic field between bones. Two notes on opposite branches that are " +
8    "semantically related find each other without any explicit link. The tree hierarchy says " +
9    "these are far apart. The vector space says these mean the same thing. Three layers working " +
10    "together: the tree is navigation (parent, children, position), the graph is explicit " +
11    "connections (cascade, codebook, contributors), the vectors are implicit connections (nobody " +
12    "linked these two notes, but they are about the same thing). enrichContext injects related " +
13    "notes into the AI context. The AI at /Health/Fitness sees a semantically related note from " +
14    "/Health/Food about protein timing without either branch explicitly referencing the other. " +
15    "Per-viewer relevance when inverse-tree is installed. The tree holds structure. The vectors " +
16    "hold meaning. Together the tree knows not just where things are but what things are like " +
17    "each other. Navigation finds things by position. Embedding finds things by resonance.",
18
19  needs: {
20    services: ["llm"],
21    models: ["Node", "Note"],
22  },
23
24  optional: {
25    extensions: ["inverse-tree"],
26  },
27
28  provides: {
29    models: {},
30    routes: "./routes.js",
31    tools: true,
32    jobs: true,
33    orchestrator: false,
34    energyActions: {},
35    sessionTypes: {},
36    env: [],
37
38    cli: [
39      {
40        command: "related", scope: ["tree"],
41        description: "Semantically similar notes at this position",
42        method: "GET",
43        endpoint: "/node/:nodeId/related",
44      },
45      {
46        command: "embed [action]",
47        description: "Embedding status and management. Actions: status, rebuild.",
48        method: "GET",
49        endpoint: "/embed/status",
50        subcommands: {
51          "status": { method: "GET", endpoint: "/embed/status", description: "Embedding coverage percentage" },
52          "rebuild": { method: "POST", endpoint: "/embed/rebuild", description: "Re-embed all notes" },
53        },
54      },
55    ],
56
57    hooks: {
58      fires: [],
59      listens: ["afterNote", "enrichContext"],
60    },
61  },
62};
63

1import express from "express";
2import authenticate from "../../seed/middleware/authenticate.js";
3import { sendOk, sendError, ERR } from "../../seed/protocol.js";
4import { findRelatedAtNode, getEmbedStatus } from "./core.js";
5
6const router = express.Router();
7
8// GET /node/:nodeId/related - semantically similar notes
9router.get("/node/:nodeId/related", authenticate, async (req, res) => {
10  try {
11    const searchAll = req.query.all === "true" || req.query.all === "1";
12    const results = await findRelatedAtNode(req.params.nodeId, req.userId, req.query.rootId || null, searchAll);
13    sendOk(res, { count: results.length, results });
14  } catch (err) {
15    sendError(res, 500, ERR.INTERNAL, err.message);
16  }
17});
18
19// GET /embed/status - coverage stats
20router.get("/embed/status", authenticate, async (req, res) => {
21  try {
22    const status = await getEmbedStatus();
23    sendOk(res, status);
24  } catch (err) {
25    sendError(res, 500, ERR.INTERNAL, err.message);
26  }
27});
28
29export default router;
30

1import { z } from "zod";
2import { findRelatedAtNode, getEmbedStatus, rebuildEmbeddings } from "./core.js";
3
4export default [
5  {
6    name: "related-notes",
7    description:
8      "Find notes semantically similar to the content at this node. Scoped to the local neighborhood (parent subtree plus sibling branches) by default. Pass searchAll for land-wide.",
9    schema: {
10      nodeId: z.string().describe("The node to find related content for."),
11      rootId: z.string().optional().describe("Tree root to search within. Auto-resolved if omitted."),
12      searchAll: z.boolean().optional().default(false).describe("Search entire tree instead of scoped neighborhood."),
13      userId: z.string().describe("Injected by server. Ignore."),
14      chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
15      sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
16    },
17    annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false },
18    handler: async ({ nodeId, rootId, searchAll, userId }) => {
19      try {
20        const results = await findRelatedAtNode(nodeId, userId, rootId, searchAll);
21        if (results.length === 0) {
22          return { content: [{ type: "text", text: "No semantically related notes found. Either no notes are embedded yet, or nothing passes the similarity threshold." }] };
23        }
24        return {
25          content: [{
26            type: "text",
27            text: JSON.stringify({
28              query: nodeId,
29              relatedCount: results.length,
30              results: results.map((r) => ({
31                nodeName: r.nodeName,
32                nodeId: r.nodeId,
33                similarity: r.similarity,
34                snippet: r.snippet,
35              })),
36            }, null, 2),
37          }],
38        };
39      } catch (err) {
40        return { content: [{ type: "text", text: `Search failed: ${err.message}` }] };
41      }
42    },
43  },
44  {
45    name: "embed-status",
46    description: "Show embedding coverage. How many notes have vectors, what percentage of the total.",
47    schema: {
48      userId: z.string().describe("Injected by server. Ignore."),
49      chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
50      sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
51    },
52    annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false },
53    handler: async () => {
54      try {
55        const status = await getEmbedStatus();
56        return { content: [{ type: "text", text: JSON.stringify(status, null, 2) }] };
57      } catch (err) {
58        return { content: [{ type: "text", text: `Status failed: ${err.message}` }] };
59      }
60    },
61  },
62  {
63    name: "embed-rebuild",
64    description: "Re-embed all text notes. Use after changing the embedding model. Token-intensive.",
65    schema: {
66      userId: z.string().describe("Injected by server. Ignore."),
67      chatId: z.string().nullable().optional().describe("Injected by server. Ignore."),
68      sessionId: z.string().nullable().optional().describe("Injected by server. Ignore."),
69    },
70    annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: true },
71    handler: async ({ userId }) => {
72      try {
73        const result = await rebuildEmbeddings(userId);
74        return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
75      } catch (err) {
76        return { content: [{ type: "text", text: `Rebuild failed: ${err.message}` }] };
77      }
78    },
79  },
80];
81

Versions

Version	Published	Downloads
1.0.1	38d ago	0
1.0.0	48d ago	0

⭐ 0 stars

⚑ 0 flags

React from the CLI: treeos ext star embed

Comments

Loading comments...

Post comments from the CLI: treeos ext comment embed "your comment"
Max 3 comments per extension. One star and one flag per user.