Created
October 28, 2018 02:54
-
-
Save andrewerb/7059c5a24a7688111ef9e99db15a4d62 to your computer and use it in GitHub Desktop.
Pokémon Name Autocomplete, in Python 3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """Pokemon Autocomplete | |
| A Python 3 Trie-object based autocomplete implementation. For Searching/completing names of Pokemon. | |
| (Pokemon 1-251 in this implementation). | |
| A web executable version of this program is available at: https://repl.it/@andrewerb/pkmnautosuggest | |
| """ | |
| __author__ = "Andrew Erb" | |
| from time import sleep | |
| ## CLASSES | |
| ## Trie object stuff: | |
| class Node: #TrieNode | |
| """Trie Node class | |
| @Attributes: | |
| * label: A letter-label | |
| * data: An optional Data value: Used if the node corresponds with the end of a full-length word; helpful for overlapping words, ie 'Paras' and 'Parasect'. Keep as None for letters that aren't the end of a word path. | |
| * children: A dictionary of child nodes (used by the Trie to store the next letter in a word) | |
| """ | |
| def __init__(self, label=None, data=None): | |
| """Node Class Constructor | |
| Each node has a label, optional data value, and dictionary of child nodes. | |
| """ | |
| self.label = label | |
| self.data = data #For nodes at the end of a full word/term, initialized as None | |
| self.children = dict() | |
| def addChild(self, key, data=None): | |
| """New key param represents the key-value of the child-node being appended to the current node.""" | |
| if not isinstance(key, Node) in self.children: | |
| self.children[key] = Node(key, data) | |
| else: | |
| self.children[key].label = key | |
| # redundant label checking while setting a child node | |
| def __getitem__(self, key): | |
| """Special method. Yields a child-node of this node, at specified key. | |
| Syntax: this_node[a_key] | |
| Yields: this_node.children[key] (a Node value) | |
| #Note: I no longer remember why I added this :) | |
| """ | |
| return self.children[key] | |
| def __del__(self): | |
| """*TODO""" | |
| pass | |
| class Trie: | |
| """Trie data-structure implementation, comprised of node-objects. | |
| * Node objects each hold a dictionary of child nodes. | |
| * Each key,value pair is a character to a node matching that label. | |
| * A given char's node contains a dictionary of corresponding child-nodes. More char-labelled key,value pairs. | |
| * Node traversals create a chain of strings. That complete string/word is stored in data in the last node of that word's traversal. | |
| TODO: | |
| * FIX actual matches (currently returns an incomplete list) | |
| * print_list method | |
| * print_trie_structure method | |
| * append words AND lists ?? | |
| * __del__ ? | |
| """ | |
| def __init__(self): | |
| """Trie class constructor. Sets up Trie with root Node | |
| TODO: Consider optional args for intializing/setup. String or list of strings, etc. | |
| """ | |
| self.root = Node() | |
| def __format_input(self, word=str("")): | |
| """String formatting to lowercase. | |
| All letters in the Trie are kept lowercase. For basically case-insensitive searches. | |
| Args: | |
| word (str): String being formatted to lower-case. | |
| Returns: | |
| Lowercase version of string-input, if input isn't None. | |
| """ | |
| if word is None: | |
| return None | |
| else: | |
| return (str(word.lower())) | |
| def __getitem__(self, key): # FIX ### | |
| return self.root.children[key] | |
| def add_word(self, word): | |
| """Add a new word to the Trie object | |
| Args: | |
| word (str): word/string added to Trie | |
| """ | |
| word = self.__format_input(word) #stored in lowercase for consistency | |
| current_node = self.root | |
| word_finished = True | |
| for index, letter in enumerate(word): #check for letters that exist already | |
| if letter in current_node.children: | |
| current_node = current_node.children[letter] | |
| else: | |
| word_finished = False | |
| break | |
| if not word_finished: | |
| #If word wasn't fully entered initially... | |
| for element in word[index:]: | |
| #...for every new letter, create a new child node. (starting from index where the last loop left off.) | |
| current_node.addChild(element) | |
| current_node = current_node.children[element] | |
| current_node.data = word | |
| #Complete word is stored in node data, at the final node of the chain. This is to avoid having to implement a stack to keep track of word-matches during later search. | |
| def has_word(self, word): | |
| """Checks for truthiness of word present as an exact/full match in Trie. | |
| Returns false for empty input. | |
| """ | |
| if word == '': | |
| return False | |
| if word is None: | |
| raise ValueError("Trie.has_word requires a not-Null string") | |
| word = self.__format_input(word) | |
| current_node = self.root #start check at the root node | |
| for letter in word: | |
| if letter in current_node.children: | |
| current_node = current_node.children[letter] | |
| else: | |
| return False | |
| else: #may reconsider this code | |
| if current_node.data is None: | |
| return False | |
| else: | |
| return True | |
| def __get_prefix_node(self, prefix): | |
| """Helper method for getting prefix matches""" | |
| top_node = self.root | |
| print ("prefix in helper is : " + prefix ) | |
| for letter in prefix: | |
| # gets node at end of prefix | |
| if letter in top_node.children: | |
| top_node = top_node[letter] | |
| else: | |
| return None #Prefix not in trie, go no further | |
| else: #successful response | |
| return top_node | |
| def __get_prefix_words(self, search_node, max_result_list_size=None): | |
| word_list = list() | |
| for key, current_node in sorted( search_node.children.items() ): | |
| if current_node.data: | |
| # data is a full word string. Added to results list. | |
| word_list.append(current_node.data) | |
| if max_result_list_size and len(word_list)>=max_result_list_size: | |
| return word_list | |
| if current_node.children: | |
| # if current_node has are children, function recursively visits those. | |
| word_list.extend(self.__get_prefix_words(current_node, max_result_list_size)) | |
| return word_list | |
| def get_prefix_matches(self, prefix, max_result_list_size=None): | |
| """Returns a list of all words in Trie that start with requested prefix""" | |
| prefix = self.__format_input(prefix) | |
| if prefix is None: | |
| raise ValueError('Requires a non-Null prefix value') | |
| top_node = self.__get_prefix_node(prefix) #Expect node or None | |
| if top_node is None or not top_node.children: | |
| #Either no Node or no children to traverse. Result is empty list. | |
| return [] | |
| else: | |
| return (self.__get_prefix_words(top_node)) #Find list of matches; Expects list output | |
| ## UI stuff | |
| class TriePrefixUI: | |
| """A barebones command-line UI for searching Prefixes against the (already initialized) autosuggestion trie.""" | |
| def __init__(self, search_trie=None): | |
| self.session_trie = search_trie | |
| def set_trie(self, search_trie=None): | |
| self.session_trie = search_trie | |
| def ui_loop(self): | |
| """User interaction loop""" | |
| while (True): | |
| print("Type a search term and hit Enter for matching suggestions", | |
| "\nType \'exit\' to end the program.") | |
| raw_input = input("> ") | |
| self.__input_handler( self.__format_input(raw_input) ) | |
| sleep(.25) | |
| def __format_input(self, input_str=str("")): | |
| """Returns a given string, lowercase and sans whitespace.""" | |
| return ( input_str.lower() ).replace(" ", "") | |
| def __input_handler(self, input_str=str("")): | |
| """Input Handler - helper function for what to do with given user commands. | |
| Either quits program or checks for existing trie & searches it. | |
| """ | |
| print("INPUT: "+input_str) | |
| if input_str == ("exit" or "quit"): #'quit' doesn't work :\ | |
| print("\nGoodbye!\n") | |
| exit() | |
| else: | |
| if type(self.session_trie) is type( Trie() ): | |
| for elem in self.session_trie.get_prefix_matches(input_str): | |
| print(str(elem)) | |
| else: | |
| print("Error: Trie is not yet set up.") | |
| exit() | |
| ## End of classes | |
| ## Initialization stuff | |
| def setup_trie(): | |
| """Sets up Trie data structure. Uses a loop to add strings from a list to a Trie and returns the Trie. | |
| This function is primarily for readability, because of the large text block for the list of names loaded into the trie object. | |
| """ | |
| pkmn_trie = Trie() | |
| pkmn_list = ['bulbasaur', 'ivysaur', 'venusaur', 'charmander', 'charmeleon', 'charizard', 'squirtle', 'wartortle', 'blastoise', 'caterpie', 'metapod', 'butterfree', 'weedle', 'kakuna', 'beedrill', 'pidgey', 'pidgeotto', 'pidgeot', 'rattata', 'raticate', 'spearow', 'fearow', 'ekans', 'arbok', 'pikachu', 'raichu', 'sandshrew', 'sandslash', 'nidoran-f', 'nidorina', 'nidoqueen', 'nidoran-m', 'nidorino', 'nidoking', 'clefairy', 'clefable', 'vulpix', 'ninetales', 'jigglypuff', 'wigglytuff', 'zubat', 'golbat', 'oddish', 'gloom', 'vileplume', 'paras', 'parasect', 'venonat', 'venomoth', 'diglett', 'dugtrio', 'meowth', 'persian', 'psyduck', 'golduck', 'mankey', 'primeape', 'growlithe', 'arcanine', 'poliwag', 'poliwhirl', 'poliwrath', 'abra', 'kadabra', 'alakazam', 'machop', 'machoke', 'machamp', 'bellsprout', 'weepinbell', 'victreebel', 'tentacool', 'tentacruel', 'geodude', 'graveler', 'golem', 'ponyta', 'rapidash', 'slowpoke', 'slowbro', 'magnemite', 'magneton', 'farfetchd', 'doduo', 'dodrio', 'seel', 'dewgong', 'grimer', 'muk', 'shellder', 'cloyster', 'gastly', 'haunter', 'gengar', 'onix', 'drowzee', 'hypno', 'krabby', 'kingler', 'voltorb', 'electrode', 'exeggcute', 'exeggutor', 'cubone', 'marowak', 'hitmonlee', 'hitmonchan', 'lickitung', 'koffing', 'weezing', 'rhyhorn', 'rhydon', 'chansey', 'tangela', 'kangaskhan', 'horsea', 'seadra', 'goldeen', 'seaking', 'staryu', 'starmie', 'mr-mime', 'scyther', 'jynx', 'electabuzz', 'magmar', 'pinsir', 'tauros', 'magikarp', 'gyarados', 'lapras', 'ditto', 'eevee', 'vaporeon', 'jolteon', 'flareon', 'porygon', 'omanyte', 'omastar', 'kabuto', 'kabutops', 'aerodactyl', 'snorlax', 'articuno', 'zapdos', 'moltres', 'dratini', 'dragonair', 'dragonite', 'mewtwo', 'mew', 'chikorita', 'bayleef', 'meganium', 'cyndaquil', 'quilava', 'typhlosion', 'totodile', 'croconaw', 'feraligatr', 'sentret', 'furret', 'hoothoot', 'noctowl', 'ledyba', 'ledian', 'spinarak', 'ariados', 'crobat', 'chinchou', 'lanturn', 'pichu', 'cleffa', 'igglybuff', 'togepi', 'togetic', 'natu', 'xatu', 'mareep', 'flaaffy', 'ampharos', 'bellossom', 'marill', 'azumarill', 'sudowoodo', 'politoed', 'hoppip', 'skiploom', 'jumpluff', 'aipom', 'sunkern', 'sunflora', 'yanma', 'wooper', 'quagsire', 'espeon', 'umbreon', 'murkrow', 'slowking', 'misdreavus', 'unown', 'wobbuffet', 'girafarig', 'pineco', 'forretress', 'dunsparce', 'gligar', 'steelix', 'snubbull', 'granbull', 'qwilfish', 'scizor', 'shuckle', 'heracross', 'sneasel', 'teddiursa', 'ursaring', 'slugma', 'magcargo', 'swinub', 'piloswine', 'corsola', 'remoraid', 'octillery', 'delibird', 'mantine', 'skarmory', 'houndour', 'houndoom', 'kingdra', 'phanpy', 'donphan', 'porygon2', 'stantler', 'smeargle', 'tyrogue', 'hitmontop', 'smoochum', 'elekid', 'magby', 'miltank', 'blissey', 'raikou', 'entei', 'suicune', 'larvitar', 'pupitar', 'tyranitar', 'lugia', 'ho-oh', 'celebi'] #First 251 Pokemon | |
| for elem in pkmn_list: | |
| pkmn_trie.add_word(elem) | |
| return pkmn_trie | |
| ## Main execution stuff | |
| def main(): | |
| """Main block for program execution.""" | |
| print("PKMN Autocomplete\n") | |
| pkmn_search_trie = setup_trie() | |
| ui_session = TriePrefixUI(pkmn_search_trie) #init trie for UI | |
| ui_session.ui_loop() | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment