Created
February 7, 2026 21:05
-
-
Save mikaelj/2426f47ed0757fa77c0b8e7e97cb3b90 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/package.json b/package.json | |
| index 068da06..4893b5e 100644 | |
| --- a/package.json | |
| +++ b/package.json | |
| @@ -18,7 +18,8 @@ | |
| "start": "node dist/index.js", | |
| "dev": "tsx src/index.ts", | |
| "test": "vitest run", | |
| - "prepublishOnly": "npm run build" | |
| + "prepublishOnly": "npm run build", | |
| + "fix-embeddings": "tsx fix-embeddings.js" | |
| }, | |
| "keywords": [ | |
| "mcp", | |
| diff --git a/src/db.ts b/src/db.ts | |
| index 0e5a4ec..8be2761 100644 | |
| --- a/src/db.ts | |
| +++ b/src/db.ts | |
| @@ -269,13 +269,16 @@ export class KnowledgeDB { | |
| getAllActiveNodesWithEmbeddings(): Array< | |
| Pick<NodeRow, "id" | "name" | "kind" | "summary" | "embedding"> | |
| > { | |
| - return this.db | |
| + const rows = this.db | |
| .prepare( | |
| "SELECT id, name, kind, summary, embedding FROM nodes WHERE removed_at IS NULL AND embedding IS NOT NULL" | |
| ) | |
| - .all() as Array< | |
| - Pick<NodeRow, "id" | "name" | "kind" | "summary" | "embedding"> | |
| - >; | |
| + .all() as Array<any>; | |
| + | |
| + return rows.map((row) => ({ | |
| + ...row, | |
| + embedding: row.embedding ? Buffer.from(row.embedding) : null | |
| + })) as Array<Pick<NodeRow, "id" | "name" | "kind" | "summary" | "embedding">>; | |
| } | |
| nodeExists(id: string): boolean { | |
| diff --git a/src/embeddings.ts b/src/embeddings.ts | |
| index fc52ab4..677941c 100644 | |
| --- a/src/embeddings.ts | |
| +++ b/src/embeddings.ts | |
| @@ -18,14 +18,28 @@ async function getExtractor(): Promise<FeatureExtractionPipeline> { | |
| } | |
| /** | |
| - * Generate an embedding vector for the given text. | |
| + * Generate an embedding vector for given text. | |
| * Returns a Buffer containing float32 values. | |
| */ | |
| export async function embed(text: string): Promise<Buffer> { | |
| + // Validate input text | |
| + if (!text || text.trim().length === 0) { | |
| + throw new Error(`Cannot embed empty text`); | |
| + } | |
| + | |
| const ext = await getExtractor(); | |
| const output = await ext(text, { pooling: "mean", normalize: true }); | |
| const data = output.data as Float32Array; | |
| - return Buffer.from(data.buffer, data.byteOffset, data.byteLength); | |
| + const buffer = Buffer.from(data.buffer, data.byteOffset, data.byteLength); | |
| + | |
| + // Validate embedding has correct dimensions | |
| + if (buffer.length !== EMBEDDING_DIM * Float32Array.BYTES_PER_ELEMENT) { | |
| + throw new Error( | |
| + `Invalid embedding dimension: expected ${EMBEDDING_DIM} floats (${EMBEDDING_DIM * Float32Array.BYTES_PER_ELEMENT} bytes), got ${data.length} floats (${buffer.length} bytes). Text: "${text.substring(0, 50)}..."` | |
| + ); | |
| + } | |
| + | |
| + return buffer; | |
| } | |
| /** | |
| @@ -91,7 +105,7 @@ export function findTopK( | |
| topK: number | |
| ): Array<{ id: string; similarity: number }> { | |
| const scored = candidates | |
| - .filter((c) => c.embedding !== null) | |
| + .filter((c) => c.embedding !== null && c.embedding.length > 0) | |
| .map((c) => ({ | |
| id: c.id, | |
| similarity: cosineSimilarity(queryEmbedding, c.embedding!), |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment