Skip to content

Instantly share code, notes, and snippets.

@Prince-of-sea
Forked from PC-CNT/PS1-scraping.py
Last active April 28, 2022 09:07
Show Gist options
  • Select an option

  • Save Prince-of-sea/3de66033cc734b60f02e99d2ff0d5535 to your computer and use it in GitHub Desktop.

Select an option

Save Prince-of-sea/3de66033cc734b60f02e99d2ff0d5535 to your computer and use it in GitHub Desktop.
パケ画像を自動で全部落とすやつ (https://psxdatacenter.com/ntsc-j_list.html
from bs4 import BeautifulSoup
import requests
import urllib
import re
import os
import unicodedata
def _path_executable_kai(moji: str) -> str:
moji = moji.replace("/", "/")
moji = moji.replace("\\", "\")
moji = moji.replace("?", "?")
moji = moji.replace("\n", "_")
moji = moji.replace(":", ":")
moji = moji.replace("\"", "”")
moji = moji.replace("<", "<")
moji = moji.replace(">", ">")
moji = moji.replace("|", "|")
moji = moji.replace("*", "*")
moji = moji.replace("\r", "")
moji = moji.replace("\t", "")
moji = moji.replace("\xa0", "")
return moji
def main():
#! ※注意 requestsが<frame>に対応していないっぽいのでjlist.htmlを直で叩くようにした
#? root_page = requests.get(r"https://psxdatacenter.com/ntsc-j_list.html")
root_page = requests.get(r"https://psxdatacenter.com/jlist.html")
root_image = r"https://psxdatacenter.com/images/covers/"
soup = BeautifulSoup(root_page.text, "html.parser")
# print(soup.prettify())
os.makedirs("PS1_covers", exist_ok=True)
os.chdir("PS1_covers")
for table in soup.select("table[class='sectiontable']"):
for tr in table.select("tr"):
if tr.select_one("a[target='jlist']") is not None:
#python3.8未満でも動くように修正
_img_url = urllib.parse.urljoin(root_image, re.sub(r"html$", "jpg", re.sub(r"games/", "", tr.select_one("a[target='jlist']").get("href"))))
_img = requests.get(_img_url)
_img_name = _img_url.split("/")[-1][:-4] + "__(" + re.sub("^ ", "", _path_executable_kai(unicodedata.normalize("NFKD", (tr.select_one("td[class='col3']").get_text())))) + ")" + ".jpg"
print(_img_url)
print(_img_name)
if _img.status_code == 200:
with open(_img_name, "wb") as f:
f.write(_img.content)
else:
with open(_img_name, "wb") as f:
f.write(requests.get("https://psxdatacenter.com/images/covers/none.jpg").content)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment