Created
December 11, 2025 10:07
-
-
Save BexTuychiev/8c0518dab8f3c7f02d8be7f855cf54a6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from selenium import webdriver | |
| from selenium.webdriver.chrome.service import Service | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| import json | |
| # Configure headless Chrome | |
| options = Options() | |
| options.add_argument("--headless") | |
| options.add_argument("--disable-gpu") | |
| options.add_argument("--no-sandbox") | |
| options.add_argument("--window-size=1920,1080") | |
| driver = webdriver.Chrome( | |
| service=Service(ChromeDriverManager().install()), | |
| options=options | |
| ) | |
| base_url = "https://quotes.toscrape.com/js/page/{}/" | |
| all_quotes = [] | |
| for page_num in range(1, 11): | |
| url = base_url.format(page_num) | |
| driver.get(url) | |
| WebDriverWait(driver, 10).until( | |
| EC.presence_of_element_located((By.CSS_SELECTOR, ".quote")) | |
| ) | |
| quotes = driver.find_elements(By.CSS_SELECTOR, ".quote") | |
| for quote in quotes: | |
| text = quote.find_element(By.CSS_SELECTOR, ".text").text | |
| # Remove surrounding quotation marks | |
| text = text.strip("\u201c\u201d") | |
| author = quote.find_element(By.CSS_SELECTOR, ".author").text | |
| tags = [tag.text for tag in quote.find_elements(By.CSS_SELECTOR, ".tag")] | |
| all_quotes.append({ | |
| "text": text, | |
| "author": author, | |
| "tags": tags | |
| }) | |
| print(f"Page {page_num}: scraped {len(quotes)} quotes") | |
| driver.quit() | |
| with open("quotes_selenium.json", "w") as f: | |
| json.dump(all_quotes, f, indent=2) | |
| print(f"\nSaved {len(all_quotes)} quotes to quotes_selenium.json") | |
| print("\nFirst 3 quotes:") | |
| for i, q in enumerate(all_quotes[:3], 1): | |
| print(f'{i}. "{q["text"][:60]}..."') | |
| print(f' Author: {q["author"]} | Tags: {", ".join(q["tags"])}') | |
| print(f"\nLast 3 quotes (#{len(all_quotes)-2} - #{len(all_quotes)}):") | |
| for i, q in enumerate(all_quotes[-3:], len(all_quotes) - 2): | |
| print(f'{i}. "{q["text"][:60]}..."') | |
| print(f' Author: {q["author"]} | Tags: {", ".join(q["tags"])}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment