Last active
February 12, 2026 01:25
-
-
Save luckylittle/58363570af7c432ad3e429bcd4b2155f to your computer and use it in GitHub Desktop.
AutoBrr List that contains "New on Netflix" from three genres (true crime, documentary films, documentary series)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/python | |
| import json | |
| from typing import List, Set | |
| import requests | |
| from bs4 import BeautifulSoup | |
| TRUE_CRIME_URL = "https://www.netflix.com/browse/genre/108820" | |
| DOCUMENTARY_URL = "https://www.netflix.com/browse/genre/2243108" | |
| DOCUMENTARY_SERIES_URL = "https://www.netflix.com/browse/genre/10105" | |
| # Stable section title in the UI (not content-dependent). | |
| SECTION_TITLE = "New on Netflix" | |
| # Button/label text we must not treat as titles when scraping the row. | |
| _NON_TITLE_TEXT: Set[str] = frozenset( | |
| {"Join now", "Play", "More Info", "Add to My List", "Next", "Previous"} | |
| ) | |
| def _titles_from_html(soup: BeautifulSoup) -> List[str]: | |
| """Extract titles from the 'New on Netflix' section using the section heading only.""" | |
| for h2 in soup.find_all("h2"): | |
| if h2.get_text(strip=True) != SECTION_TITLE: | |
| continue | |
| # Row can be: next sibling of h2, or next sibling of h2's parent (current Netflix layout). | |
| row = h2.find_next_sibling() | |
| if not row: | |
| parent = h2.find_parent() | |
| if parent is not None: | |
| row = parent.find_next_sibling() | |
| if not row: | |
| continue | |
| titles = [ | |
| p.get_text(strip=True) | |
| for p in row.find_all("p") | |
| if (p.get_text(strip=True) and p.get_text(strip=True) not in _NON_TITLE_TEXT) | |
| ] | |
| if titles: | |
| return titles | |
| return [] | |
| def _titles_from_json_ld(soup: BeautifulSoup) -> List[str]: | |
| """Use JSON-LD only when an ItemList is explicitly named 'New on Netflix'.""" | |
| for script in soup.find_all("script", type="application/ld+json"): | |
| try: | |
| data = json.loads(script.string) | |
| except (TypeError, json.JSONDecodeError): | |
| continue | |
| if data.get("@type") != "ItemList" or data.get("name") != SECTION_TITLE: | |
| continue | |
| elements = data.get("itemListElement") or [] | |
| return [ | |
| e["item"]["name"] | |
| for e in elements | |
| if isinstance(e.get("item"), dict) and e.get("item", {}).get("name") | |
| ] | |
| return [] | |
| def get_new_on_netflix_titles(html: str) -> List[str]: | |
| soup = BeautifulSoup(html, "html.parser") | |
| # 1) Prefer HTML: section identified by the stable heading "New on Netflix". | |
| titles = _titles_from_html(soup) | |
| if titles: | |
| return titles | |
| # 2) Fallback: JSON-LD ItemList named "New on Netflix" (if Netflix adds it). | |
| return _titles_from_json_ld(soup) | |
| def main() -> None: | |
| headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) Chrome/141.0.7150.0"} | |
| all_titles: List[str] = [] | |
| for url in (TRUE_CRIME_URL, DOCUMENTARY_URL, DOCUMENTARY_SERIES_URL): | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| all_titles.extend(get_new_on_netflix_titles(response.text)) | |
| for title in sorted(set(all_titles), key=str.lower): | |
| print(title.lower()) | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| all the empty rooms | |
| bad boy billionaires: india | |
| ballkids | |
| bill bailey’s wild west australia | |
| breakdown: 1975 | |
| cover-up | |
| deeper | |
| depeche mode: m | |
| dining with the kapoors | |
| elway | |
| evil influencer: the jodi hildebrandt story | |
| first weapons | |
| glitter & gold: ice dancing | |
| kidnapped: elizabeth smart | |
| lali: time to step up | |
| mark rober's crunchlabs | |
| masaka kids, a rhythm within | |
| matter of time | |
| miracle: the boys of '80 | |
| missing: dead or alive? | |
| murder in monaco | |
| one last adventure: the making of stranger things 5 | |
| paparazzi king | |
| people’s republic of mallacoota | |
| queen of chess | |
| sangre del toro | |
| scotty james: pipe dream | |
| sean combs: the reckoning | |
| selena y los dinos: a family’s legacy | |
| simon cowell: the next act | |
| starto countdown 2025→2026 | |
| take that | |
| the carman family deaths | |
| the investigation of lucy letby | |
| the making of jay kelly | |
| the new yorker at 100 | |
| the stringer: the man who took the photo | |
| the whiteley art scandal | |
| timelesz project -real- | |
| unlocked: a jail experiment | |
| wwe: unreal |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to use this in AutoBrr?
https://<AUTOBRR_IP>:<AUTOBRR_PORT>/settings/listshttps://gist.githubusercontent.com/luckylittle/58363570af7c432ad3e429bcd4b2155f/raw/netflix.txt