Last active
December 18, 2025 20:15
-
-
Save dimitrisli/2dfe5d07d8d6aaabf76b9f6f4ba8d883 to your computer and use it in GitHub Desktop.
LocalEmbeddingModel
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package com.example.ai.embeddings; | |
| import org.springframework.ai.document.Document; | |
| import org.springframework.ai.embedding.AbstractEmbeddingModel; | |
| import org.springframework.ai.embedding.Embedding; | |
| import org.springframework.ai.embedding.EmbeddingRequest; | |
| import org.springframework.ai.embedding.EmbeddingResponse; | |
| import org.springframework.stereotype.Component; | |
| import java.util.*; | |
| @Component | |
| public class LocalEmbeddingModel extends AbstractEmbeddingModel { | |
| private final List<String> vocabulary; | |
| private final Map<String, Integer> indexByToken; | |
| public LocalEmbeddingModel() { | |
| this.vocabulary = List.of( | |
| "order", "customer", "invoice", "payment", "amount", | |
| "status", "date", "product", "price", "quantity" | |
| ); | |
| this.indexByToken = new HashMap<>(); | |
| for (int i = 0; i < vocabulary.size(); i++) { | |
| indexByToken.put(vocabulary.get(i), i); | |
| } | |
| } | |
| @Override | |
| public EmbeddingResponse call(EmbeddingRequest request) { | |
| List<String> texts = request.getInstructions(); | |
| List<Embedding> embeddings = new ArrayList<>(texts.size()); | |
| for (int i = 0; i < texts.size(); i++) { | |
| embeddings.add(embedText(texts.get(i), i)); | |
| } | |
| return new EmbeddingResponse(embeddings); | |
| } | |
| @Override | |
| public float[] embed(Document document) { | |
| // Used by SimpleVectorStore when adding Documents directly | |
| return embedText(document.getContent(), 0).getEmbedding(); | |
| } | |
| private Embedding embedText(String text, int index) { | |
| float[] vec = new float[vocabulary.size()]; | |
| if (text != null) { | |
| String[] tokens = tokenize(text); | |
| for (String token : tokens) { | |
| Integer idx = indexByToken.get(token); | |
| if (idx != null) { | |
| vec[idx] += 1.0f; | |
| } | |
| } | |
| } | |
| normalize(vec); | |
| return new Embedding(vec, index); | |
| } | |
| private String[] tokenize(String text) { | |
| return text.toLowerCase(Locale.ROOT) | |
| .replaceAll("[^a-z0-9 ]", " ") | |
| .split("\\s+"); | |
| } | |
| private void normalize(float[] v) { | |
| float sumSq = 0.0f; | |
| for (float x : v) { | |
| sumSq += x * x; | |
| } | |
| if (sumSq == 0.0f) return; | |
| float norm = (float) Math.sqrt(sumSq); | |
| for (int i = 0; i < v.length; i++) { | |
| v[i] /= norm; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment