Created
December 11, 2025 10:09
-
-
Save BexTuychiev/372f74c3e5f7f2446d823728deed7af5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import asyncio | |
| from pyppeteer import launch | |
| import json | |
| async def scrape_quotes(): | |
| browser = await launch(headless=True) | |
| page = await browser.newPage() | |
| await page.setViewport({"width": 1920, "height": 1080}) | |
| base_url = "https://quotes.toscrape.com/js/page/{}/" | |
| all_quotes = [] | |
| for page_num in range(1, 11): | |
| url = base_url.format(page_num) | |
| await page.goto(url) | |
| await page.waitForSelector(".quote") | |
| quotes = await page.querySelectorAll(".quote") | |
| for quote in quotes: | |
| text_elem = await quote.querySelector(".text") | |
| text = await page.evaluate("el => el.textContent", text_elem) | |
| text = text.strip().strip("\u201c\u201d") | |
| author_elem = await quote.querySelector(".author") | |
| author = await page.evaluate("el => el.textContent", author_elem) | |
| tag_elems = await quote.querySelectorAll(".tag") | |
| tags = [] | |
| for tag_elem in tag_elems: | |
| tag_text = await page.evaluate("el => el.textContent", tag_elem) | |
| tags.append(tag_text) | |
| all_quotes.append({ | |
| "text": text, | |
| "author": author, | |
| "tags": tags | |
| }) | |
| print(f"Page {page_num}: scraped {len(quotes)} quotes") | |
| await browser.close() | |
| return all_quotes | |
| # Run the async function | |
| all_quotes = asyncio.run(scrape_quotes()) | |
| with open("quotes_pyppeteer.json", "w") as f: | |
| json.dump(all_quotes, f, indent=2) | |
| print(f"\nSaved {len(all_quotes)} quotes to quotes_pyppeteer.json") | |
| print("\nFirst 3 quotes:") | |
| for i, q in enumerate(all_quotes[:3], 1): | |
| print(f'{i}. "{q["text"][:60]}..."') | |
| print(f' Author: {q["author"]} | Tags: {", ".join(q["tags"])}') | |
| print(f"\nLast 3 quotes (#{len(all_quotes)-2} - #{len(all_quotes)}):") | |
| for i, q in enumerate(all_quotes[-3:], len(all_quotes) - 2): | |
| print(f'{i}. "{q["text"][:60]}..."') | |
| print(f' Author: {q["author"]} | Tags: {", ".join(q["tags"])}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment