Created
November 29, 2025 13:35
-
-
Save mrenouf/c464c624652b59c003f370706f10a984 to your computer and use it in GitHub Desktop.
Kotlin SuffixTree
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package com.bitgrind.common.trees | |
| import java.util.* | |
| class SuffixTree { | |
| private class SuffixRef(val word: Int, val offset: Int) | |
| private class Edge(val word: Int, var first: Int, val last: Int, var node: Node) { | |
| val length: Int | |
| get() = (last - first) + 1 | |
| fun first(words: List<String>): Char = words[word][first] | |
| fun prefix(n: Int) = Edge(word, first, minOf(last, first + (n - 1)), node) | |
| fun suffix(n: Int) = Edge(word, minOf(last, first + n), last, node) | |
| fun setNode(newNode: Node): Edge { | |
| node = newNode | |
| return this | |
| } | |
| } | |
| private class Node { | |
| val edges: MutableMap<Char, Edge> = hashMapOf() | |
| var suffixes: MutableList<SuffixRef> = LinkedList() | |
| private fun commonPrefixLength(words: List<String>, edge: Edge, value: String): Int { | |
| val w = words[edge.word] | |
| for (i in 0 until minOf(edge.length, value.length)) { | |
| val c = w[edge.first + i].compareTo(value[i]) | |
| if (c != 0) { | |
| return i | |
| } | |
| } | |
| return minOf(edge.length, value.length) | |
| } | |
| private fun commonPrefixLength(words: List<String>, edge: Edge, other: Edge): Int { | |
| val u = words[edge.word] | |
| val v = words[other.word] | |
| for (i in 0 until minOf(edge.length, other.length)) { | |
| val c = u[edge.first + i].compareTo(v[other.first + i]) | |
| if (c != 0) { | |
| return i | |
| } | |
| } | |
| return minOf(edge.length, other.length) | |
| } | |
| private fun edgesEqual(words: List<String>, edge: Edge, other: Edge): Boolean { | |
| if (edge.length != other.length) return false | |
| val u = words[edge.word] | |
| val v = words[other.word] | |
| for (i in 0 until edge.length) { | |
| if (u[edge.first + i].compareTo(v[other.first + i]) != 0) { | |
| return false | |
| } | |
| } | |
| return true | |
| } | |
| fun collectSuffixes(): List<SuffixRef> { | |
| val result = mutableListOf<SuffixRef>() | |
| val stack = ArrayDeque<Node>() | |
| stack.add(this) | |
| while (stack.isNotEmpty()) { | |
| val cur = stack.removeLast() | |
| result.addAll(cur.suffixes) | |
| for (edge in cur.edges.values) { | |
| stack.add(edge.node) | |
| } | |
| } | |
| return result | |
| } | |
| fun find(words: List<String>, substring: String): Node? { | |
| val firstChar = substring.first() | |
| val currEdge = edges[firstChar] | |
| if (currEdge != null) { | |
| val prefixLength = commonPrefixLength(words, currEdge, substring) | |
| if (prefixLength == substring.length) { | |
| return currEdge.node | |
| } | |
| if (prefixLength > 0) { | |
| return currEdge.node.find(words, substring.drop(prefixLength)) | |
| } | |
| } | |
| return null | |
| } | |
| fun addEdge(words: List<String>, newEdge: Edge): Node { | |
| val firstChar = newEdge.first(words) | |
| val currEdge = edges[firstChar] | |
| if (currEdge == null) { | |
| edges[firstChar] = newEdge | |
| return newEdge.node | |
| } | |
| if (edgesEqual(words, newEdge, currEdge)) { | |
| return currEdge.node | |
| } | |
| val prefixLength = commonPrefixLength(words, newEdge, currEdge) | |
| val commonEdge: Edge | |
| if (currEdge.length > prefixLength) { | |
| commonEdge = currEdge.prefix(prefixLength).apply { node = Node() } | |
| edges[firstChar] = commonEdge | |
| commonEdge.node.addEdge(words, currEdge.suffix(prefixLength)) | |
| } else { | |
| commonEdge = currEdge | |
| } | |
| if (newEdge.length > commonEdge.length) { | |
| return commonEdge.node.addEdge(words, newEdge.suffix(prefixLength)) | |
| } | |
| return commonEdge.node | |
| } | |
| } | |
| val words = mutableListOf<String>() | |
| private val root = Node() | |
| fun String.parenthesize(open: Int, close: Int): String { | |
| return substring(0, open) + "(" + substring(open, close) + ")" + substring(close) | |
| } | |
| fun find(substring: String): List<String> { | |
| val match = root.find(words, substring.uppercase()) | |
| val suffixList = match?.collectSuffixes() ?: emptyList() | |
| return suffixList.sortedBy { it.word }.map { suffix -> | |
| words[suffix.word].parenthesize(suffix.offset, suffix.offset + substring.length) | |
| } | |
| } | |
| fun add(word: String) { | |
| words.add(word) | |
| for (i in word.indices) { | |
| val node = root.addEdge(words, Edge(words.lastIndex, i, word.lastIndex, Node())) | |
| node.suffixes.add(SuffixRef(word = words.lastIndex, offset = i)) | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment