Last active
December 29, 2025 04:24
-
-
Save apetenchea/4df556a49f9a2543be877c31355b4164 to your computer and use it in GitHub Desktop.
Download any manuals from https://www.manua.ls
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This script gathers all the pages of a manual and merges them into a PDF. | |
| # You'll need to play a bit with inspect-element in order to figure out the format the correct url, | |
| # but it should be easy to adapt it to any manual. | |
| # This script is specifically for https://www.manua.ls/audi/q3-2018/manual. | |
| # Their url format is https://www.manua.ls/viewer/{manual-id}/{page-number}/bg{page-number-hex}.png | |
| # Example: https://www.manua.ls/viewer/668006/100/bg64.png | |
| # Enjoy! | |
| import requests | |
| from tqdm import tqdm | |
| from PIL import Image | |
| from io import BytesIO | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.lib.utils import ImageReader | |
| def download_image(url): | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| return Image.open(BytesIO(response.content)) | |
| else: | |
| print(f"Failed to download {url}") | |
| return None | |
| def save_images_as_pdf(images, pdf_filename): | |
| c = canvas.Canvas(pdf_filename, pagesize=letter) | |
| width, height = letter | |
| for image in images: | |
| image_width, image_height = image.size | |
| aspect_ratio = image_width / image_height | |
| new_width = width | |
| new_height = width / aspect_ratio | |
| if new_height > height: | |
| new_height = height | |
| new_width = height * aspect_ratio | |
| # Convert PIL image to byte stream | |
| img_byte_arr = BytesIO() | |
| image.save(img_byte_arr, format='PNG') | |
| img_byte_arr.seek(0) | |
| # Draw image from byte stream | |
| c.drawImage(ImageReader(img_byte_arr), 0, height - new_height, width=new_width, height=new_height) | |
| c.showPage() | |
| c.save() | |
| def main(): | |
| base_url = "https://www.manua.ls/viewer/668006/" | |
| images = [] | |
| for i in tqdm(range(1, 231)): # Adjust the range as needed | |
| url = f"{base_url}{i}/bg{hex(i)[2:]}.png" | |
| image = download_image(url) | |
| if image: | |
| images.append(image) | |
| if images: | |
| save_images_as_pdf(images, "output.pdf") | |
| print("PDF created successfully") | |
| else: | |
| print("No images downloaded") | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Use this script for webp manuals | |
| # example: https://www.manua.ls/growatt/min-3000-11400tl-xh-us/manual?p=1 | |
| # pip install selenium webdriver-manager pillow tqdm | |
| # By default Firefox is used, but it's easy to adapt to chrome, see below | |
| """ | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.chrome.service import Service | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| options = Options() | |
| options.headless = True | |
| options.add_argument("--window-size=1200,1600") | |
| driver = webdriver.Chrome(options=options) | |
| """ | |
| from selenium import webdriver | |
| from selenium.webdriver.firefox.options import Options | |
| from selenium.webdriver.firefox.service import Service | |
| from webdriver_manager.firefox import GeckoDriverManager | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| from PIL import Image | |
| from tqdm import tqdm | |
| import io | |
| import time | |
| def get_screenshot(driver, url, consent): | |
| driver.get(url) | |
| # Wait for consent and give time for JS to load elements | |
| if consent: | |
| try: | |
| consent_button = WebDriverWait(driver, 3).until( | |
| EC.element_to_be_clickable((By.XPATH, '//button[@aria-label="Consent"]')) | |
| ) | |
| consent_button.click() | |
| except: | |
| pass | |
| else: | |
| time.sleep(1) | |
| viewer_div = driver.find_element(By.ID, "viewer") | |
| # Save screenshot of just one element | |
| png = viewer_div.screenshot_as_png | |
| # Optional, intermediary save step | |
| # viewer_div.screenshot(f"{url[-1]}.png") | |
| return Image.open(io.BytesIO(png)) | |
| def main(): | |
| options = Options() | |
| options.headless = True | |
| options.set_preference("layout.css.devPixelsPerPx", "1.5") | |
| driver = webdriver.Firefox(options=options) | |
| base_url = "https://www.manua.ls" | |
| images = [] | |
| try: | |
| for i in tqdm(range(1, 82)): # number of pages 81 | |
| url = f"{base_url}/growatt/min-3000-11400tl-xh-us/manual?p={i}" # manual name may differ | |
| img = get_screenshot(driver, url, consent=(i == 1)) | |
| if img: | |
| images.append(img) | |
| finally: | |
| driver.quit() | |
| if images: | |
| images[0].save("output.pdf", save_all=True, append_images=images[1:]) | |
| print("PDF created successfully") | |
| else: | |
| print("No screenshots taken") | |
| driver.quit() | |
| if __name__ == "__main__": | |
| main() |
Author
how to use? I can't run it.
❯ python manuals.py Traceback (most recent call last): File "D:\Applications\Manua.ls downloader\manuals.py", line 9, in <module> from tqdm import tqdm ModuleNotFoundError: No module named 'tqdm'UPDATE: alright I thought I should install package written in the
from ... import ...section. I tried to download as much as I can but still didn't work.❯ python manuals.py 0%| | 0/230 [00:01<?, ?it/s] Traceback (most recent call last): File "D:\Applications\Manua.ls downloader\manuals.py", line 62, in <module> main() ~~~~^^ File "D:\Applications\Manua.ls downloader\manuals.py", line 51, in main image = download_image(url) File "D:\Applications\Manua.ls downloader\manuals.py", line 19, in download_image return Image.open(BytesIO(response.content)) ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\komi\scoop\apps\python\current\Lib\site-packages\PIL\Image.py", line 3498, in open raise UnidentifiedImageError(msg) PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x000002C3E22E3D80>❯ pip install BytesIO ERROR: Could not find a version that satisfies the requirement BytesIO (from versions: none) [notice] A new release of pip is available: 25.2 -> 25.3 [notice] To update, run: python.exe -m pip install --upgrade pip ERROR: No matching distribution found for BytesIO
Put these in a file called requirements.txt:
requests
tqdm
Pillow
reportlab
selenium
webdriver-manager
Run pip install -r requirements.txt, or python -m pip install requirements.txt (in case you have multiple python versions).
Oh wait it failed because I changed the base url to this:
base_url = "https://www.manua.ls/asus/rog-strix-b860-i-gaming-wifi"I guess I'll have to figure out how to get the base url.
Oh this is only grabbing images, the manuals that I wanted to download have text in it.
Webp script worked for me thanks. I was targetting https://www.manua.ls/honda/life-2010/manual and just had to change the main code to this
base_url = "https://www.manua.ls"
images = []
try:
for i in tqdm(range(1, 248)):
url = f"{base_url}/honda/life-2010/manual?p={i}"
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
how to use? I can't run it.
UPDATE:
alright I thought I should install package written in the
from ... import ...section. I tried to download as much as I can but still didn't work.