Last active
December 12, 2025 16:07
-
-
Save frutik/c0ab8b5e4362cd4b0c0a23a20ccc8b89 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Levenshtein distance between two strings. | |
| * - O(m*n) time | |
| * - O(min(m,n)) memory | |
| */ | |
| function levenshtein(a, b) { | |
| if (a === b) return 0; | |
| if (!a) return b.length; | |
| if (!b) return a.length; | |
| // Ensure b is the shorter string to use less memory | |
| if (a.length < b.length) [a, b] = [b, a]; | |
| const n = b.length; | |
| let prev = new Array(n + 1); | |
| let curr = new Array(n + 1); | |
| for (let j = 0; j <= n; j++) prev[j] = j; | |
| for (let i = 1; i <= a.length; i++) { | |
| curr[0] = i; | |
| const ai = a.charCodeAt(i - 1); | |
| for (let j = 1; j <= n; j++) { | |
| const cost = ai === b.charCodeAt(j - 1) ? 0 : 1; | |
| const del = prev[j] + 1; // delete from a | |
| const ins = curr[j - 1] + 1; // insert into a | |
| const sub = prev[j - 1] + cost; // substitute | |
| curr[j] = Math.min(del, ins, sub); | |
| } | |
| [prev, curr] = [curr, prev]; | |
| } | |
| return prev[n]; | |
| } | |
| console.log(levenshtein("kitten", "sitting")); // 3 | |
| function normalizedLevenshtein(a, b) { | |
| const dist = levenshtein(a, b); | |
| return dist / Math.max(a.length, b.length); | |
| } | |
| console.log(normalizedLevenshtein("kitten", "sitting")); // 0.42857142857142855 | |
| const k = 10; | |
| let seen = []; // store previous names | |
| let total = 0; // sum of distances | |
| let count = 0; // number of pairs | |
| eachDoc(function (doc, i) { | |
| const name = doc.name; | |
| if (!name) return; | |
| for (let j = 0; j < seen.length; j++) { | |
| const other = seen[j]; | |
| const lev = levenshtein(name, other); | |
| const norm = lev / Math.max(name.length, other.length); | |
| total += norm; | |
| count++; | |
| } | |
| seen.push(name); | |
| }, k); | |
| const score = count > 0 ? total / count : 0.0; | |
| setScore(score); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment