Skip to content

Instantly share code, notes, and snippets.

@TAG-Epic
Last active March 25, 2021 23:13
Show Gist options
  • Select an option

  • Save TAG-Epic/fa02975244d967a75fd8fa4c01c5a031 to your computer and use it in GitHub Desktop.

Select an option

Save TAG-Epic/fa02975244d967a75fd8fa4c01c5a031 to your computer and use it in GitHub Desktop.
Automatically download pokemon info and artwork
import os
from typing import Dict, List, Any, Union
# import requests
from bs4 import BeautifulSoup
from shutil import copyfileobj
import os
import json
import pymongo
import sys
from colorama import init, Fore, Style
import signal
import time
import cloudscraper
requests = cloudscraper.create_scraper()
s = float(time.time())
def kill_handler(a, b):
print("\n")
sys.exit(0)
overrides = {
"nidoran-m": "nidoran♀",
"nidoran-f": "nidoran♂",
"farfetchd": "farfetch%27d",
"mr-mime": "Mr._Mime",
"ho-oh": "Ho-Oh",
"mime-jr": "Mime_Jr.",
"porygon-z": "Porygon-Z",
"flabebe": "Flabébé",
"type-null": "Type:_Null",
"tapu-koko": "Tapu_Koko",
"tapu-lele": "Tapu_Lele",
"tapu-bulu": "Tapu-Bulu",
"tapu-fini": "Tapu-Fini",
"sirfetchd": "Sirfetch%27d",
"mr-rime": "Mr._Rime"
}
spawn_name_overrides = {
"nidoran-m": "nidoran",
"nidoran-f": "nidoran",
"farfetchd": "farfetch'd",
"sirfetchd": "sirfetch'd"
}
signal.signal(signal.SIGINT, kill_handler)
init()
prod = "--production" in sys.argv
if prod:
client = pymongo.MongoClient("mongodb://proddb")
else:
client = pymongo.MongoClient()
db = client.pokebot
pokemons_table = db.pokemons
POKEMON_BASE_URL = "https://pokemondb.net"
POKEMON_LIST_URL = f"{POKEMON_BASE_URL}/pokedex/all"
POKEMON_IMAGE_BASE = "https://pokemon.fandom.com/wiki/"
IS_PROD = None
urls = []
db_insert = []
file_insert = {}
scanned_artworks = []
artworks = {}
already_in_db = 0
invalid_status_code = 0
def clear_old_images():
"""
Clears old images in the pokemon dir
"""
for img in os.listdir("pokemons/"):
os.remove(f"pokemons/{img}")
def clear_database():
"""
Clears database
"""
return pokemons_table.delete_many({}).deleted_count
def get_pokemon_urls():
r = requests.get(POKEMON_LIST_URL)
if r.status_code != 200:
print(f"Invalid status code on get pokemon urls")
return
soup = BeautifulSoup(r.text, "html.parser")
pokemon_table = soup.find(id="pokedex", recursive=True)
pokemon_body_table = pokemon_table.find("tbody", recursive=False)
pokemons = pokemon_body_table.find_all("tr", recursive=False)
for pokemon in pokemons:
pokemon_name_object = pokemon.find(class_="cell-name", recursive=False)
pokemon_name = pokemon_name_object.find("a", recursive=False)
pokemon_url = pokemon_name.get("href")
urls.append(f"{POKEMON_BASE_URL}{pokemon_url}")
def scan_pokemon_url(url: str):
global invalid_status_code, already_in_db
r = requests.get(url)
if r.status_code != 200:
print(f"Invalid status code on scan pokemon url ({url})")
return
soup = BeautifulSoup(r.text, "html.parser")
pokemon = {}
tables = soup.find_all(class_="vitals-table", recursive=True)
pokemon["name"] = soup.find("h1", recursive=True).contents[0].lower()
pokemon["spawn_name"] = spawn_name_overrides.get(pokemon["name"], pokemon["name"])
pokedex_table_object = tables[0]
pokedex_table = pokedex_table_object.find("tbody", recursive=False)
pokedex_objects = pokedex_table.find_all(recursive=False)
pokedex_id_object = pokedex_objects[0]
pokedex_type_object = pokedex_objects[1]
pokedex_species_object = pokedex_objects[2]
pokedex_height_object = pokedex_objects[3]
pokedex_weight_object = pokedex_objects[4]
pokemon["pokedex-id"] = pokedex_id_object.find("strong", recursive=True).contents[0]
pokemon["types"] = [a.contents[0].lower() for a in pokedex_type_object.find_all("a", recursive=True)]
pokemon["species"] = [td.contents[0].lower() for td in pokedex_species_object.find_all("td", recursive=True)]
pokemon["height"] = pokedex_height_object.find("td", recursive=False).contents[0].split(" ")[0][:-2]
pokemon["width"] = pokedex_weight_object.find("td", recursive=False).contents[0].split(" ")[0][:-3]
pokemon_stats_table_object = tables[3]
pokemon_stats_table = pokemon_stats_table_object.find("tbody", recursive=False)
pokemon_stats_table_objects = pokemon_stats_table.find_all("tr", recursive=False)
pokemon_stats_footer = pokemon_stats_table_object.find("tfoot", recursive=False)
pokemon_stats = {}
pokemon_stats["total"] = int(pokemon_stats_footer.find("b").contents[0])
pokemon_stats["hp"] = int(pokemon_stats_table_objects[0].find(class_="cell-num").contents[0])
pokemon_stats["attack"] = int(pokemon_stats_table_objects[1].find(class_="cell-num").contents[0])
pokemon_stats["defense"] = int(pokemon_stats_table_objects[2].find(class_="cell-num").contents[0])
pokemon_stats["special-attack"] = int(pokemon_stats_table_objects[3].find(class_="cell-num").contents[0])
pokemon_stats["special-defense"] = int(pokemon_stats_table_objects[4].find(class_="cell-num").contents[0])
pokemon_stats["speed"] = int(pokemon_stats_table_objects[5].find(class_="cell-num").contents[0])
pokemon["base_stats"] = pokemon_stats
if pokemon_stats["total"] >= 600:
pokemon["rarity"] = 2
elif pokemon_stats["total"] >= 500:
pokemon["rarity"] = 1
else:
pokemon["rarity"] = 0
if url.split("/")[-1] in scanned_artworks:
already_in_db += 1
# print(f"Already have {pokemon['name']} in the db, aborting!")
return
scanned_artworks.append(url.split("/")[-1])
is_successful, artwork_name = scan_for_artwork(url.split("/")[-1])
pokemon["image"] = artwork_name
if is_successful:
db_insert.append(pokemon)
else:
invalid_status_code += 1
def scan_for_artwork(name: str):
display_name = name
if name in overrides:
name = overrides[name]
r = requests.get(POKEMON_IMAGE_BASE + name)
if r.status_code != 200:
print(f"Invalid status code on scan for artwork ({name})")
return False, None
soup = BeautifulSoup(r.text, "html.parser")
image_object = soup.find(class_="pi-image-thumbnail")
if image_object is not None:
artworks[display_name] = image_object.get("src")
else:
print(f"Artwork not found? ({name})")
return False, None
return True, display_name
def download_artwork(name, url):
r = requests.get(url, stream=True)
if r.status_code != 200:
raise RuntimeError("Invalid status code. Ratelimited?")
r.raw.decode_content = True
file = open(f"pokemons/{name}.png", "wb+")
copyfileobj(r.raw, file)
file.close()
print(f"{Fore.YELLOW}Clearing old images...{Style.RESET_ALL}", end="\r")
clear_old_images()
print(f"{Fore.LIGHTGREEN_EX}Cleared old images! {Style.RESET_ALL}")
print(f"{Fore.YELLOW}Clearing old database entries...{Style.RESET_ALL}", end="\r")
db_deletes = clear_database()
print(f"{Fore.LIGHTGREEN_EX}Cleared old database entries! Total: {db_deletes} {Style.RESET_ALL}")
print(f"{Fore.YELLOW}Fetching pokemon urls... {Style.RESET_ALL}", end="\r")
get_pokemon_urls()
print(f"{Fore.LIGHTGREEN_EX}Fetched pokemon urls! {Style.RESET_ALL}")
c = 0
total_pokemons = len(urls)
for url in urls:
c += 1
print(f"{Fore.YELLOW}Scanning pokemon ({c}/{total_pokemons}) [{url}]{Style.RESET_ALL}", end="\r")
scan_pokemon_url(url)
print(
f"{Fore.LIGHTGREEN_EX}Scanned {total_pokemons} pokemons! {Style.RESET_ALL}")
c = 0
total_artworks = len(artworks.keys())
for artwork in artworks.keys():
c += 1
print(f"{Fore.YELLOW}Downloading artwork ({c}/{total_artworks}) [{artwork}]{Style.RESET_ALL}", end="\r")
download_artwork(artwork, artworks[artwork])
print(f"{Fore.LIGHTGREEN_EX}Downloaded {total_artworks} artworks! {Style.RESET_ALL}")
print(f"{Fore.YELLOW}Writing to the DB... {Style.RESET_ALL}", end="\r")
pokemons_table.insert_many(db_insert)
print(f"{Fore.LIGHTGREEN_EX}Wrote {len(db_insert)} entries to the DB! {Style.RESET_ALL}")
e = float(time.time())
print("\n" * 4)
print("Results: ")
print(f"Failed downloads: {invalid_status_code}")
print(f"Already in database: {already_in_db}")
print(f"Took {e - s}s")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment