Skip to content

Instantly share code, notes, and snippets.

@apetenchea
Last active December 29, 2025 04:24
Show Gist options
  • Select an option

  • Save apetenchea/4df556a49f9a2543be877c31355b4164 to your computer and use it in GitHub Desktop.

Select an option

Save apetenchea/4df556a49f9a2543be877c31355b4164 to your computer and use it in GitHub Desktop.
Download any manuals from https://www.manua.ls
# This script gathers all the pages of a manual and merges them into a PDF.
# You'll need to play a bit with inspect-element in order to figure out the format the correct url,
# but it should be easy to adapt it to any manual.
# This script is specifically for https://www.manua.ls/audi/q3-2018/manual.
# Their url format is https://www.manua.ls/viewer/{manual-id}/{page-number}/bg{page-number-hex}.png
# Example: https://www.manua.ls/viewer/668006/100/bg64.png
# Enjoy!
import requests
from tqdm import tqdm
from PIL import Image
from io import BytesIO
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.utils import ImageReader
def download_image(url):
response = requests.get(url)
if response.status_code == 200:
return Image.open(BytesIO(response.content))
else:
print(f"Failed to download {url}")
return None
def save_images_as_pdf(images, pdf_filename):
c = canvas.Canvas(pdf_filename, pagesize=letter)
width, height = letter
for image in images:
image_width, image_height = image.size
aspect_ratio = image_width / image_height
new_width = width
new_height = width / aspect_ratio
if new_height > height:
new_height = height
new_width = height * aspect_ratio
# Convert PIL image to byte stream
img_byte_arr = BytesIO()
image.save(img_byte_arr, format='PNG')
img_byte_arr.seek(0)
# Draw image from byte stream
c.drawImage(ImageReader(img_byte_arr), 0, height - new_height, width=new_width, height=new_height)
c.showPage()
c.save()
def main():
base_url = "https://www.manua.ls/viewer/668006/"
images = []
for i in tqdm(range(1, 231)): # Adjust the range as needed
url = f"{base_url}{i}/bg{hex(i)[2:]}.png"
image = download_image(url)
if image:
images.append(image)
if images:
save_images_as_pdf(images, "output.pdf")
print("PDF created successfully")
else:
print("No images downloaded")
if __name__ == "__main__":
main()
# Use this script for webp manuals
# example: https://www.manua.ls/growatt/min-3000-11400tl-xh-us/manual?p=1
# pip install selenium webdriver-manager pillow tqdm
# By default Firefox is used, but it's easy to adapt to chrome, see below
"""
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
options = Options()
options.headless = True
options.add_argument("--window-size=1200,1600")
driver = webdriver.Chrome(options=options)
"""
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from PIL import Image
from tqdm import tqdm
import io
import time
def get_screenshot(driver, url, consent):
driver.get(url)
# Wait for consent and give time for JS to load elements
if consent:
try:
consent_button = WebDriverWait(driver, 3).until(
EC.element_to_be_clickable((By.XPATH, '//button[@aria-label="Consent"]'))
)
consent_button.click()
except:
pass
else:
time.sleep(1)
viewer_div = driver.find_element(By.ID, "viewer")
# Save screenshot of just one element
png = viewer_div.screenshot_as_png
# Optional, intermediary save step
# viewer_div.screenshot(f"{url[-1]}.png")
return Image.open(io.BytesIO(png))
def main():
options = Options()
options.headless = True
options.set_preference("layout.css.devPixelsPerPx", "1.5")
driver = webdriver.Firefox(options=options)
base_url = "https://www.manua.ls"
images = []
try:
for i in tqdm(range(1, 82)): # number of pages 81
url = f"{base_url}/growatt/min-3000-11400tl-xh-us/manual?p={i}" # manual name may differ
img = get_screenshot(driver, url, consent=(i == 1))
if img:
images.append(img)
finally:
driver.quit()
if images:
images[0].save("output.pdf", save_all=True, append_images=images[1:])
print("PDF created successfully")
else:
print("No screenshots taken")
driver.quit()
if __name__ == "__main__":
main()
@theegghatching
Copy link

Works beautifully. Thank you.

@gohamstergo
Copy link

gohamstergo commented Jul 22, 2025

it seems they may have gotten more clever recently. the images download fine, but any text elements on the page are now part of separate div classes and so are not downloaded.

an example with lots of text elements: https://www.manua.ls/growatt/min-3000-11400tl-xh-us/manual?p=44

@blakkd
Copy link

blakkd commented Jul 22, 2025

Another trick is to use https://www.manualslib.com/ instead which allows PDF downloading ;)

@gohamstergo
Copy link

they dont have this 81 page manual

@blakkd
Copy link

blakkd commented Jul 22, 2025

Hmm, oh yeah I didn't check for your specific case. I just solved mine 30min ago and wanted to share the tip but unfortunate for you :/

@apetenchea
Copy link
Author

it seems they may have gotten more clever recently. the images download fine, but any text elements on the page are now part of separate div classes and so are not downloaded.

an example with lots of text elements: https://www.manua.ls/growatt/min-3000-11400tl-xh-us/manual?p=44

Apparently they can do that for webp manuals. The good news is that the url keeps increasing as a counter https://www.manua.ls/growatt/min-3000-11400tl-xh-us/manual?p=50, then ?p=51 and so on.
The quick and dirty option is to use a webdriver (eg selenium) and take screenshots of the "viewer" element. That would probably take some time, so you might want to grab a coffee or let it run overnight, until you let your computer do the work. I estimate about 30 min for this 80 page manual.

@apetenchea
Copy link
Author

ill look into that, thanks.

I played a bit with https://gist.github.com/apetenchea/4df556a49f9a2543be877c31355b4164#file-webp-manuals-py
It should do the job, if you have the patience to wait for it.

@gohamstergo
Copy link

thanks, ill mess with that

@gohamstergo
Copy link

gohamstergo commented Jul 22, 2025

this is great. i made two changes:

viewer_div = driver.find_element(By.CLASS_NAME, "viewer-page")
doing it this way removes the viewer UI (the arrows still show. i know selenium can hide elements, but this is good enough for what i need)
and
options.add_argument("--headless")
the other headless method works for chrome but not FF.

@pegasusearl
Copy link

pegasusearl commented Dec 24, 2025

how to use? I can't run it.

❯ python manuals.py
Traceback (most recent call last):
  File "D:\Applications\Manua.ls downloader\manuals.py", line 9, in <module>
    from tqdm import tqdm
ModuleNotFoundError: No module named 'tqdm'

UPDATE:
alright I thought I should install package written in the from ... import ... section. I tried to download as much as I can but still didn't work.

❯ python manuals.py
  0%|                                                                                          | 0/230 [00:01<?, ?it/s]
Traceback (most recent call last):
  File "D:\Applications\Manua.ls downloader\manuals.py", line 62, in <module>
    main()
    ~~~~^^
  File "D:\Applications\Manua.ls downloader\manuals.py", line 51, in main
    image = download_image(url)
  File "D:\Applications\Manua.ls downloader\manuals.py", line 19, in download_image
    return Image.open(BytesIO(response.content))
           ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\komi\scoop\apps\python\current\Lib\site-packages\PIL\Image.py", line 3498, in open
    raise UnidentifiedImageError(msg)
PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x000002C3E22E3D80>
❯ pip install BytesIO
ERROR: Could not find a version that satisfies the requirement BytesIO (from versions: none)

[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for BytesIO

@apetenchea
Copy link
Author

how to use? I can't run it.

❯ python manuals.py
Traceback (most recent call last):
  File "D:\Applications\Manua.ls downloader\manuals.py", line 9, in <module>
    from tqdm import tqdm
ModuleNotFoundError: No module named 'tqdm'

UPDATE: alright I thought I should install package written in the from ... import ... section. I tried to download as much as I can but still didn't work.

❯ python manuals.py
  0%|                                                                                          | 0/230 [00:01<?, ?it/s]
Traceback (most recent call last):
  File "D:\Applications\Manua.ls downloader\manuals.py", line 62, in <module>
    main()
    ~~~~^^
  File "D:\Applications\Manua.ls downloader\manuals.py", line 51, in main
    image = download_image(url)
  File "D:\Applications\Manua.ls downloader\manuals.py", line 19, in download_image
    return Image.open(BytesIO(response.content))
           ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\komi\scoop\apps\python\current\Lib\site-packages\PIL\Image.py", line 3498, in open
    raise UnidentifiedImageError(msg)
PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x000002C3E22E3D80>
❯ pip install BytesIO
ERROR: Could not find a version that satisfies the requirement BytesIO (from versions: none)

[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for BytesIO

Put these in a file called requirements.txt:

requests
tqdm
Pillow
reportlab
selenium
webdriver-manager

Run pip install -r requirements.txt, or python -m pip install requirements.txt (in case you have multiple python versions).

@pegasusearl
Copy link

pegasusearl commented Dec 24, 2025

Oh wait it failed because I changed the base url to this:

base_url = "https://www.manua.ls/asus/rog-strix-b860-i-gaming-wifi"

I guess I'll have to figure out how to get the base url.
Oh this is only grabbing images, the manuals that I wanted to download have text in it.

@smahm006
Copy link

Webp script worked for me thanks. I was targetting https://www.manua.ls/honda/life-2010/manual and just had to change the main code to this

base_url = "https://www.manua.ls"
    images = []
    try:
        for i in tqdm(range(1, 248)):
            url = f"{base_url}/honda/life-2010/manual?p={i}"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment