Last active
March 25, 2021 23:13
-
-
Save TAG-Epic/fa02975244d967a75fd8fa4c01c5a031 to your computer and use it in GitHub Desktop.
Automatically download pokemon info and artwork
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from typing import Dict, List, Any, Union | |
| # import requests | |
| from bs4 import BeautifulSoup | |
| from shutil import copyfileobj | |
| import os | |
| import json | |
| import pymongo | |
| import sys | |
| from colorama import init, Fore, Style | |
| import signal | |
| import time | |
| import cloudscraper | |
| requests = cloudscraper.create_scraper() | |
| s = float(time.time()) | |
| def kill_handler(a, b): | |
| print("\n") | |
| sys.exit(0) | |
| overrides = { | |
| "nidoran-m": "nidoran♀", | |
| "nidoran-f": "nidoran♂", | |
| "farfetchd": "farfetch%27d", | |
| "mr-mime": "Mr._Mime", | |
| "ho-oh": "Ho-Oh", | |
| "mime-jr": "Mime_Jr.", | |
| "porygon-z": "Porygon-Z", | |
| "flabebe": "Flabébé", | |
| "type-null": "Type:_Null", | |
| "tapu-koko": "Tapu_Koko", | |
| "tapu-lele": "Tapu_Lele", | |
| "tapu-bulu": "Tapu-Bulu", | |
| "tapu-fini": "Tapu-Fini", | |
| "sirfetchd": "Sirfetch%27d", | |
| "mr-rime": "Mr._Rime" | |
| } | |
| spawn_name_overrides = { | |
| "nidoran-m": "nidoran", | |
| "nidoran-f": "nidoran", | |
| "farfetchd": "farfetch'd", | |
| "sirfetchd": "sirfetch'd" | |
| } | |
| signal.signal(signal.SIGINT, kill_handler) | |
| init() | |
| prod = "--production" in sys.argv | |
| if prod: | |
| client = pymongo.MongoClient("mongodb://proddb") | |
| else: | |
| client = pymongo.MongoClient() | |
| db = client.pokebot | |
| pokemons_table = db.pokemons | |
| POKEMON_BASE_URL = "https://pokemondb.net" | |
| POKEMON_LIST_URL = f"{POKEMON_BASE_URL}/pokedex/all" | |
| POKEMON_IMAGE_BASE = "https://pokemon.fandom.com/wiki/" | |
| IS_PROD = None | |
| urls = [] | |
| db_insert = [] | |
| file_insert = {} | |
| scanned_artworks = [] | |
| artworks = {} | |
| already_in_db = 0 | |
| invalid_status_code = 0 | |
| def clear_old_images(): | |
| """ | |
| Clears old images in the pokemon dir | |
| """ | |
| for img in os.listdir("pokemons/"): | |
| os.remove(f"pokemons/{img}") | |
| def clear_database(): | |
| """ | |
| Clears database | |
| """ | |
| return pokemons_table.delete_many({}).deleted_count | |
| def get_pokemon_urls(): | |
| r = requests.get(POKEMON_LIST_URL) | |
| if r.status_code != 200: | |
| print(f"Invalid status code on get pokemon urls") | |
| return | |
| soup = BeautifulSoup(r.text, "html.parser") | |
| pokemon_table = soup.find(id="pokedex", recursive=True) | |
| pokemon_body_table = pokemon_table.find("tbody", recursive=False) | |
| pokemons = pokemon_body_table.find_all("tr", recursive=False) | |
| for pokemon in pokemons: | |
| pokemon_name_object = pokemon.find(class_="cell-name", recursive=False) | |
| pokemon_name = pokemon_name_object.find("a", recursive=False) | |
| pokemon_url = pokemon_name.get("href") | |
| urls.append(f"{POKEMON_BASE_URL}{pokemon_url}") | |
| def scan_pokemon_url(url: str): | |
| global invalid_status_code, already_in_db | |
| r = requests.get(url) | |
| if r.status_code != 200: | |
| print(f"Invalid status code on scan pokemon url ({url})") | |
| return | |
| soup = BeautifulSoup(r.text, "html.parser") | |
| pokemon = {} | |
| tables = soup.find_all(class_="vitals-table", recursive=True) | |
| pokemon["name"] = soup.find("h1", recursive=True).contents[0].lower() | |
| pokemon["spawn_name"] = spawn_name_overrides.get(pokemon["name"], pokemon["name"]) | |
| pokedex_table_object = tables[0] | |
| pokedex_table = pokedex_table_object.find("tbody", recursive=False) | |
| pokedex_objects = pokedex_table.find_all(recursive=False) | |
| pokedex_id_object = pokedex_objects[0] | |
| pokedex_type_object = pokedex_objects[1] | |
| pokedex_species_object = pokedex_objects[2] | |
| pokedex_height_object = pokedex_objects[3] | |
| pokedex_weight_object = pokedex_objects[4] | |
| pokemon["pokedex-id"] = pokedex_id_object.find("strong", recursive=True).contents[0] | |
| pokemon["types"] = [a.contents[0].lower() for a in pokedex_type_object.find_all("a", recursive=True)] | |
| pokemon["species"] = [td.contents[0].lower() for td in pokedex_species_object.find_all("td", recursive=True)] | |
| pokemon["height"] = pokedex_height_object.find("td", recursive=False).contents[0].split(" ")[0][:-2] | |
| pokemon["width"] = pokedex_weight_object.find("td", recursive=False).contents[0].split(" ")[0][:-3] | |
| pokemon_stats_table_object = tables[3] | |
| pokemon_stats_table = pokemon_stats_table_object.find("tbody", recursive=False) | |
| pokemon_stats_table_objects = pokemon_stats_table.find_all("tr", recursive=False) | |
| pokemon_stats_footer = pokemon_stats_table_object.find("tfoot", recursive=False) | |
| pokemon_stats = {} | |
| pokemon_stats["total"] = int(pokemon_stats_footer.find("b").contents[0]) | |
| pokemon_stats["hp"] = int(pokemon_stats_table_objects[0].find(class_="cell-num").contents[0]) | |
| pokemon_stats["attack"] = int(pokemon_stats_table_objects[1].find(class_="cell-num").contents[0]) | |
| pokemon_stats["defense"] = int(pokemon_stats_table_objects[2].find(class_="cell-num").contents[0]) | |
| pokemon_stats["special-attack"] = int(pokemon_stats_table_objects[3].find(class_="cell-num").contents[0]) | |
| pokemon_stats["special-defense"] = int(pokemon_stats_table_objects[4].find(class_="cell-num").contents[0]) | |
| pokemon_stats["speed"] = int(pokemon_stats_table_objects[5].find(class_="cell-num").contents[0]) | |
| pokemon["base_stats"] = pokemon_stats | |
| if pokemon_stats["total"] >= 600: | |
| pokemon["rarity"] = 2 | |
| elif pokemon_stats["total"] >= 500: | |
| pokemon["rarity"] = 1 | |
| else: | |
| pokemon["rarity"] = 0 | |
| if url.split("/")[-1] in scanned_artworks: | |
| already_in_db += 1 | |
| # print(f"Already have {pokemon['name']} in the db, aborting!") | |
| return | |
| scanned_artworks.append(url.split("/")[-1]) | |
| is_successful, artwork_name = scan_for_artwork(url.split("/")[-1]) | |
| pokemon["image"] = artwork_name | |
| if is_successful: | |
| db_insert.append(pokemon) | |
| else: | |
| invalid_status_code += 1 | |
| def scan_for_artwork(name: str): | |
| display_name = name | |
| if name in overrides: | |
| name = overrides[name] | |
| r = requests.get(POKEMON_IMAGE_BASE + name) | |
| if r.status_code != 200: | |
| print(f"Invalid status code on scan for artwork ({name})") | |
| return False, None | |
| soup = BeautifulSoup(r.text, "html.parser") | |
| image_object = soup.find(class_="pi-image-thumbnail") | |
| if image_object is not None: | |
| artworks[display_name] = image_object.get("src") | |
| else: | |
| print(f"Artwork not found? ({name})") | |
| return False, None | |
| return True, display_name | |
| def download_artwork(name, url): | |
| r = requests.get(url, stream=True) | |
| if r.status_code != 200: | |
| raise RuntimeError("Invalid status code. Ratelimited?") | |
| r.raw.decode_content = True | |
| file = open(f"pokemons/{name}.png", "wb+") | |
| copyfileobj(r.raw, file) | |
| file.close() | |
| print(f"{Fore.YELLOW}Clearing old images...{Style.RESET_ALL}", end="\r") | |
| clear_old_images() | |
| print(f"{Fore.LIGHTGREEN_EX}Cleared old images! {Style.RESET_ALL}") | |
| print(f"{Fore.YELLOW}Clearing old database entries...{Style.RESET_ALL}", end="\r") | |
| db_deletes = clear_database() | |
| print(f"{Fore.LIGHTGREEN_EX}Cleared old database entries! Total: {db_deletes} {Style.RESET_ALL}") | |
| print(f"{Fore.YELLOW}Fetching pokemon urls... {Style.RESET_ALL}", end="\r") | |
| get_pokemon_urls() | |
| print(f"{Fore.LIGHTGREEN_EX}Fetched pokemon urls! {Style.RESET_ALL}") | |
| c = 0 | |
| total_pokemons = len(urls) | |
| for url in urls: | |
| c += 1 | |
| print(f"{Fore.YELLOW}Scanning pokemon ({c}/{total_pokemons}) [{url}]{Style.RESET_ALL}", end="\r") | |
| scan_pokemon_url(url) | |
| print( | |
| f"{Fore.LIGHTGREEN_EX}Scanned {total_pokemons} pokemons! {Style.RESET_ALL}") | |
| c = 0 | |
| total_artworks = len(artworks.keys()) | |
| for artwork in artworks.keys(): | |
| c += 1 | |
| print(f"{Fore.YELLOW}Downloading artwork ({c}/{total_artworks}) [{artwork}]{Style.RESET_ALL}", end="\r") | |
| download_artwork(artwork, artworks[artwork]) | |
| print(f"{Fore.LIGHTGREEN_EX}Downloaded {total_artworks} artworks! {Style.RESET_ALL}") | |
| print(f"{Fore.YELLOW}Writing to the DB... {Style.RESET_ALL}", end="\r") | |
| pokemons_table.insert_many(db_insert) | |
| print(f"{Fore.LIGHTGREEN_EX}Wrote {len(db_insert)} entries to the DB! {Style.RESET_ALL}") | |
| e = float(time.time()) | |
| print("\n" * 4) | |
| print("Results: ") | |
| print(f"Failed downloads: {invalid_status_code}") | |
| print(f"Already in database: {already_in_db}") | |
| print(f"Took {e - s}s") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment