Skip to content

Instantly share code, notes, and snippets.

@mikaelj
Created February 7, 2026 21:05
Show Gist options
  • Select an option

  • Save mikaelj/2426f47ed0757fa77c0b8e7e97cb3b90 to your computer and use it in GitHub Desktop.

Select an option

Save mikaelj/2426f47ed0757fa77c0b8e7e97cb3b90 to your computer and use it in GitHub Desktop.
diff --git a/package.json b/package.json
index 068da06..4893b5e 100644
--- a/package.json
+++ b/package.json
@@ -18,7 +18,8 @@
"start": "node dist/index.js",
"dev": "tsx src/index.ts",
"test": "vitest run",
- "prepublishOnly": "npm run build"
+ "prepublishOnly": "npm run build",
+ "fix-embeddings": "tsx fix-embeddings.js"
},
"keywords": [
"mcp",
diff --git a/src/db.ts b/src/db.ts
index 0e5a4ec..8be2761 100644
--- a/src/db.ts
+++ b/src/db.ts
@@ -269,13 +269,16 @@ export class KnowledgeDB {
getAllActiveNodesWithEmbeddings(): Array<
Pick<NodeRow, "id" | "name" | "kind" | "summary" | "embedding">
> {
- return this.db
+ const rows = this.db
.prepare(
"SELECT id, name, kind, summary, embedding FROM nodes WHERE removed_at IS NULL AND embedding IS NOT NULL"
)
- .all() as Array<
- Pick<NodeRow, "id" | "name" | "kind" | "summary" | "embedding">
- >;
+ .all() as Array<any>;
+
+ return rows.map((row) => ({
+ ...row,
+ embedding: row.embedding ? Buffer.from(row.embedding) : null
+ })) as Array<Pick<NodeRow, "id" | "name" | "kind" | "summary" | "embedding">>;
}
nodeExists(id: string): boolean {
diff --git a/src/embeddings.ts b/src/embeddings.ts
index fc52ab4..677941c 100644
--- a/src/embeddings.ts
+++ b/src/embeddings.ts
@@ -18,14 +18,28 @@ async function getExtractor(): Promise<FeatureExtractionPipeline> {
}
/**
- * Generate an embedding vector for the given text.
+ * Generate an embedding vector for given text.
* Returns a Buffer containing float32 values.
*/
export async function embed(text: string): Promise<Buffer> {
+ // Validate input text
+ if (!text || text.trim().length === 0) {
+ throw new Error(`Cannot embed empty text`);
+ }
+
const ext = await getExtractor();
const output = await ext(text, { pooling: "mean", normalize: true });
const data = output.data as Float32Array;
- return Buffer.from(data.buffer, data.byteOffset, data.byteLength);
+ const buffer = Buffer.from(data.buffer, data.byteOffset, data.byteLength);
+
+ // Validate embedding has correct dimensions
+ if (buffer.length !== EMBEDDING_DIM * Float32Array.BYTES_PER_ELEMENT) {
+ throw new Error(
+ `Invalid embedding dimension: expected ${EMBEDDING_DIM} floats (${EMBEDDING_DIM * Float32Array.BYTES_PER_ELEMENT} bytes), got ${data.length} floats (${buffer.length} bytes). Text: "${text.substring(0, 50)}..."`
+ );
+ }
+
+ return buffer;
}
/**
@@ -91,7 +105,7 @@ export function findTopK(
topK: number
): Array<{ id: string; similarity: number }> {
const scored = candidates
- .filter((c) => c.embedding !== null)
+ .filter((c) => c.embedding !== null && c.embedding.length > 0)
.map((c) => ({
id: c.id,
similarity: cosineSimilarity(queryEmbedding, c.embedding!),
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment