Last active
December 21, 2025 05:31
-
-
Save Chenx221/29e409dff3fd9f425f51baf316652bce to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # pip install requests beautifulsoup4 pillow | |
| import argparse | |
| import json | |
| import os | |
| import time | |
| import shutil | |
| import requests | |
| from urllib.parse import urlparse | |
| from bs4 import BeautifulSoup | |
| try: | |
| from PIL import Image | |
| except Exception: | |
| Image = None | |
| # ========== viewer-id ========== | |
| def get_viewer_id(episode_id=None): | |
| if episode_id is None: | |
| episode_id = input("episodes ID: ").strip() | |
| url = f"https://takecomic.jp/episodes/{episode_id}" | |
| headers = { | |
| "User-Agent": "Mozilla/5.0", | |
| "Accept": "text/html", | |
| } | |
| resp = requests.get(url, headers=headers, timeout=15) | |
| resp.raise_for_status() | |
| soup = BeautifulSoup(resp.text, "html.parser") | |
| el = soup.find(attrs={"data-comici-viewer-id": True}) | |
| if not el: | |
| raise RuntimeError("data-comici-viewer-id not found") | |
| return el["data-comici-viewer-id"] | |
| # ========== JSON helpers ========== | |
| def find_total_pages(obj): | |
| if isinstance(obj, dict): | |
| if "totalPages" in obj: | |
| return obj["totalPages"] | |
| for v in obj.values(): | |
| r = find_total_pages(v) | |
| if r is not None: | |
| return r | |
| elif isinstance(obj, list): | |
| for i in obj: | |
| r = find_total_pages(i) | |
| if r is not None: | |
| return r | |
| return None | |
| def fetch_json(url): | |
| resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=20) | |
| resp.raise_for_status() | |
| return resp.json() | |
| # ========== descramble ========== | |
| def _parse_scramble(scramble_str): | |
| if not scramble_str: | |
| return None | |
| s = ''.join(scramble_str.split()) | |
| if s.startswith('[') and s.endswith(']'): | |
| s = s[1:-1] | |
| return [int(x) for x in s.split(',')] if s else [] | |
| def descramble_image(src_path, scramble_str, br=4, pr=4): | |
| if Image is None: | |
| raise RuntimeError("pip install pillow") | |
| scramble = _parse_scramble(scramble_str) | |
| with Image.open(src_path) as im: | |
| w, h = im.size | |
| tw = w // br | |
| th = h // pr | |
| xr = [[c, r] for c in range(br) for r in range(pr)] | |
| v = [xr[i] for i in scramble] | |
| out = Image.new(im.mode, (w, h)) | |
| f = 0 | |
| for p in range(br): | |
| for q in range(pr): | |
| sc, sr = v[f] | |
| box = (sc * tw, sr * th, sc * tw + tw, sr * th + th) | |
| crop = im.crop(box) | |
| out.paste(crop, (p * tw, q * th)) | |
| f += 1 | |
| base, ext = os.path.splitext(src_path) | |
| out_path = base + "_descrambled" + ext | |
| out.save(out_path) | |
| # move original | |
| orig_dir = os.path.join(os.path.dirname(src_path), "original") | |
| os.makedirs(orig_dir, exist_ok=True) | |
| shutil.move(src_path, os.path.join(orig_dir, os.path.basename(src_path))) | |
| return out_path | |
| # ========== main ========== | |
| def main(): | |
| p = argparse.ArgumentParser() | |
| p.add_argument("viewer_id", nargs="?", help="viewer-id") | |
| p.add_argument("--json", help="local contentsInfo.json path") | |
| p.add_argument("-e", "--episode", help="episode id") | |
| args = p.parse_args() | |
| viewer_id = args.viewer_id | |
| full = None | |
| out_dir = None | |
| base_api = "https://takecomic.jp/api/book/contentsInfo" | |
| # If user provided a local JSON, use it directly | |
| if args.json: | |
| local_json = args.json | |
| if not os.path.isfile(local_json): | |
| raise FileNotFoundError(f"Local JSON file not found: {local_json}") | |
| with open(local_json, "r", encoding="utf-8") as f: | |
| full = json.load(f) | |
| # create out dir and copy JSON there | |
| ts = str(int(time.time())) | |
| out_dir = os.path.join(os.getcwd(), ts) | |
| os.makedirs(out_dir, exist_ok=True) | |
| shutil.copy(local_json, os.path.join(out_dir, "data.json")) | |
| results = full.get("result", []) | |
| print(f"Using local JSON, processing {len(results)} images") | |
| else: | |
| viewer_id = args.viewer_id | |
| if not viewer_id: | |
| # try to obtain viewer id from episode page; if that fails, fall back to prompt for local JSON | |
| try: | |
| viewer_id = get_viewer_id(args.episode) | |
| except RuntimeError as e: | |
| if "data-comici-viewer-id not found" in str(e): | |
| local_json = input("viewer requires auth — local contentsInfo.json path: ").strip() | |
| if not os.path.isfile(local_json): | |
| raise FileNotFoundError(f"Local JSON file not found: {local_json}") | |
| with open(local_json, "r", encoding="utf-8") as f: | |
| full = json.load(f) | |
| # create out dir and copy JSON there | |
| ts = str(int(time.time())) | |
| out_dir = os.path.join(os.getcwd(), ts) | |
| os.makedirs(out_dir, exist_ok=True) | |
| shutil.copy(local_json, os.path.join(out_dir, "data.json")) | |
| results = full.get("result", []) | |
| print(f"Using local JSON, processing {len(results)} images") | |
| else: | |
| raise | |
| if viewer_id and full is None: | |
| # normal online flow: fetch pages from API | |
| first = fetch_json(f"{base_api}?comici-viewer-id={viewer_id}&page-from=0&page-to=0") | |
| total = int(find_total_pages(first)) - 1 | |
| full = fetch_json( | |
| f"{base_api}?comici-viewer-id={viewer_id}&page-from=0&page-to={total}" | |
| ) | |
| # create out dir and save fetched JSON | |
| ts = str(int(time.time())) | |
| out_dir = os.path.join(os.getcwd(), ts) | |
| os.makedirs(out_dir, exist_ok=True) | |
| with open(os.path.join(out_dir, "data.json"), "w", encoding="utf-8") as f: | |
| json.dump(full, f, ensure_ascii=False, indent=2) | |
| results = full.get("result", []) | |
| print(f"Downloaded and processing {len(results)} images") | |
| img_headers = { | |
| "accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8", | |
| "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", | |
| "dnt": "1", | |
| "origin": "https://takecomic.jp", | |
| "referer": "https://takecomic.jp/", | |
| "sec-fetch-dest": "image", | |
| "sec-fetch-mode": "cors", | |
| "sec-fetch-site": "same-site", | |
| "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", | |
| } | |
| for idx, item in enumerate(results): | |
| url = item.get("imageUrl") | |
| scramble = item.get("scramble") | |
| if not url: | |
| continue | |
| name = os.path.basename(urlparse(url).path) | |
| dst = os.path.join(out_dir, name) | |
| with requests.get(url, headers=img_headers, stream=True) as r: | |
| r.raise_for_status() | |
| with open(dst, "wb") as f: | |
| for c in r.iter_content(8192): | |
| f.write(c) | |
| print(f"DOWNLOADED: {name}") | |
| if scramble: | |
| try: | |
| outp = descramble_image(dst, scramble) | |
| print(f"DESCRAMBLED: {outp}") | |
| except Exception as e: | |
| print(f"SCRAMBLE ERROR {name}: {e}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment