Skip to content

Instantly share code, notes, and snippets.

@Chenx221
Last active December 21, 2025 05:31
Show Gist options
  • Select an option

  • Save Chenx221/29e409dff3fd9f425f51baf316652bce to your computer and use it in GitHub Desktop.

Select an option

Save Chenx221/29e409dff3fd9f425f51baf316652bce to your computer and use it in GitHub Desktop.
# pip install requests beautifulsoup4 pillow
import argparse
import json
import os
import time
import shutil
import requests
from urllib.parse import urlparse
from bs4 import BeautifulSoup
try:
from PIL import Image
except Exception:
Image = None
# ========== viewer-id ==========
def get_viewer_id(episode_id=None):
if episode_id is None:
episode_id = input("episodes ID: ").strip()
url = f"https://takecomic.jp/episodes/{episode_id}"
headers = {
"User-Agent": "Mozilla/5.0",
"Accept": "text/html",
}
resp = requests.get(url, headers=headers, timeout=15)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
el = soup.find(attrs={"data-comici-viewer-id": True})
if not el:
raise RuntimeError("data-comici-viewer-id not found")
return el["data-comici-viewer-id"]
# ========== JSON helpers ==========
def find_total_pages(obj):
if isinstance(obj, dict):
if "totalPages" in obj:
return obj["totalPages"]
for v in obj.values():
r = find_total_pages(v)
if r is not None:
return r
elif isinstance(obj, list):
for i in obj:
r = find_total_pages(i)
if r is not None:
return r
return None
def fetch_json(url):
resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=20)
resp.raise_for_status()
return resp.json()
# ========== descramble ==========
def _parse_scramble(scramble_str):
if not scramble_str:
return None
s = ''.join(scramble_str.split())
if s.startswith('[') and s.endswith(']'):
s = s[1:-1]
return [int(x) for x in s.split(',')] if s else []
def descramble_image(src_path, scramble_str, br=4, pr=4):
if Image is None:
raise RuntimeError("pip install pillow")
scramble = _parse_scramble(scramble_str)
with Image.open(src_path) as im:
w, h = im.size
tw = w // br
th = h // pr
xr = [[c, r] for c in range(br) for r in range(pr)]
v = [xr[i] for i in scramble]
out = Image.new(im.mode, (w, h))
f = 0
for p in range(br):
for q in range(pr):
sc, sr = v[f]
box = (sc * tw, sr * th, sc * tw + tw, sr * th + th)
crop = im.crop(box)
out.paste(crop, (p * tw, q * th))
f += 1
base, ext = os.path.splitext(src_path)
out_path = base + "_descrambled" + ext
out.save(out_path)
# move original
orig_dir = os.path.join(os.path.dirname(src_path), "original")
os.makedirs(orig_dir, exist_ok=True)
shutil.move(src_path, os.path.join(orig_dir, os.path.basename(src_path)))
return out_path
# ========== main ==========
def main():
p = argparse.ArgumentParser()
p.add_argument("viewer_id", nargs="?", help="viewer-id")
p.add_argument("--json", help="local contentsInfo.json path")
p.add_argument("-e", "--episode", help="episode id")
args = p.parse_args()
viewer_id = args.viewer_id
full = None
out_dir = None
base_api = "https://takecomic.jp/api/book/contentsInfo"
# If user provided a local JSON, use it directly
if args.json:
local_json = args.json
if not os.path.isfile(local_json):
raise FileNotFoundError(f"Local JSON file not found: {local_json}")
with open(local_json, "r", encoding="utf-8") as f:
full = json.load(f)
# create out dir and copy JSON there
ts = str(int(time.time()))
out_dir = os.path.join(os.getcwd(), ts)
os.makedirs(out_dir, exist_ok=True)
shutil.copy(local_json, os.path.join(out_dir, "data.json"))
results = full.get("result", [])
print(f"Using local JSON, processing {len(results)} images")
else:
viewer_id = args.viewer_id
if not viewer_id:
# try to obtain viewer id from episode page; if that fails, fall back to prompt for local JSON
try:
viewer_id = get_viewer_id(args.episode)
except RuntimeError as e:
if "data-comici-viewer-id not found" in str(e):
local_json = input("viewer requires auth — local contentsInfo.json path: ").strip()
if not os.path.isfile(local_json):
raise FileNotFoundError(f"Local JSON file not found: {local_json}")
with open(local_json, "r", encoding="utf-8") as f:
full = json.load(f)
# create out dir and copy JSON there
ts = str(int(time.time()))
out_dir = os.path.join(os.getcwd(), ts)
os.makedirs(out_dir, exist_ok=True)
shutil.copy(local_json, os.path.join(out_dir, "data.json"))
results = full.get("result", [])
print(f"Using local JSON, processing {len(results)} images")
else:
raise
if viewer_id and full is None:
# normal online flow: fetch pages from API
first = fetch_json(f"{base_api}?comici-viewer-id={viewer_id}&page-from=0&page-to=0")
total = int(find_total_pages(first)) - 1
full = fetch_json(
f"{base_api}?comici-viewer-id={viewer_id}&page-from=0&page-to={total}"
)
# create out dir and save fetched JSON
ts = str(int(time.time()))
out_dir = os.path.join(os.getcwd(), ts)
os.makedirs(out_dir, exist_ok=True)
with open(os.path.join(out_dir, "data.json"), "w", encoding="utf-8") as f:
json.dump(full, f, ensure_ascii=False, indent=2)
results = full.get("result", [])
print(f"Downloaded and processing {len(results)} images")
img_headers = {
"accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
"dnt": "1",
"origin": "https://takecomic.jp",
"referer": "https://takecomic.jp/",
"sec-fetch-dest": "image",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
}
for idx, item in enumerate(results):
url = item.get("imageUrl")
scramble = item.get("scramble")
if not url:
continue
name = os.path.basename(urlparse(url).path)
dst = os.path.join(out_dir, name)
with requests.get(url, headers=img_headers, stream=True) as r:
r.raise_for_status()
with open(dst, "wb") as f:
for c in r.iter_content(8192):
f.write(c)
print(f"DOWNLOADED: {name}")
if scramble:
try:
outp = descramble_image(dst, scramble)
print(f"DESCRAMBLED: {outp}")
except Exception as e:
print(f"SCRAMBLE ERROR {name}: {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment