Created
December 16, 2025 00:05
-
-
Save bryanseah234/891af97efa72cf7fb621cb01598a7c91 to your computer and use it in GitHub Desktop.
youtube-channels-code (code to sort your subscription by subscribers)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import re | |
| # you need to find the script nonce that contains channel ID, then copy and save the whole element into "youtube js.txt" | |
| def parse_subscriber_count(count_text): | |
| """ | |
| Parses strings like '1.05M subscribers' or '45K subscribers' into integers. | |
| """ | |
| if not count_text: | |
| return 0 | |
| # Extract the numeric part and the suffix (K or M) | |
| match = re.search(r'([\d\.]+)\s*([KM]?)', count_text, re.IGNORECASE) | |
| if not match: | |
| return 0 | |
| number_str = match.group(1) | |
| multiplier = match.group(2).upper() | |
| try: | |
| value = float(number_str) | |
| except ValueError: | |
| return 0 | |
| if multiplier == 'K': | |
| return int(value * 1000) | |
| elif multiplier == 'M': | |
| return int(value * 1_000_000) | |
| return int(value) | |
| def extract_text_from_node(node): | |
| """ | |
| Helper to extract text from YouTube's nested text objects | |
| (handles both 'simpleText' and 'runs'). | |
| """ | |
| if not isinstance(node, dict): | |
| return "" | |
| if 'simpleText' in node: | |
| return node['simpleText'] | |
| elif 'runs' in node: | |
| return "".join([r.get('text', '') for r in node['runs']]) | |
| return "" | |
| def extract_channels_recursive(data, channels_list): | |
| """ | |
| Recursively searches the JSON for channel entries. | |
| """ | |
| if isinstance(data, dict): | |
| # We look for an object that has a channelId | |
| if 'channelId' in data: | |
| count_text = "" | |
| # STRATEGY: Try 'videoCountText' first (as seen in your file), | |
| # then fallback to 'subscriberCountText'. | |
| if 'videoCountText' in data: | |
| text = extract_text_from_node(data['videoCountText']) | |
| if "subscriber" in text.lower(): | |
| count_text = text | |
| if not count_text and 'subscriberCountText' in data: | |
| text = extract_text_from_node(data['subscriberCountText']) | |
| # Only use if it looks like a count (contains digits) | |
| if any(char.isdigit() for char in text): | |
| count_text = text | |
| # If we found a valid subscriber count, extract the name and add to list | |
| if count_text: | |
| name = "" | |
| if 'title' in data: | |
| name = extract_text_from_node(data['title']) | |
| channels_list.append({ | |
| 'channelName': name, | |
| 'channelId': data['channelId'], | |
| 'subscriberCount': parse_subscriber_count(count_text), | |
| 'subscriberCountText': count_text | |
| }) | |
| # Continue recursion | |
| for value in data.values(): | |
| extract_channels_recursive(value, channels_list) | |
| elif isinstance(data, list): | |
| for item in data: | |
| extract_channels_recursive(item, channels_list) | |
| def main(): | |
| input_filename = 'youtube js.txt' | |
| output_filename = 'subscriptions.json' | |
| try: | |
| with open(input_filename, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| # Locate JSON | |
| match = re.search(r'var ytInitialData\s*=\s*(\{.*?\});', content, re.DOTALL) | |
| if not match: | |
| print(f"Error: Could not find 'ytInitialData' in {input_filename}") | |
| return | |
| json_str = match.group(1) | |
| data = json.loads(json_str) | |
| # Extract | |
| extracted_channels = [] | |
| extract_channels_recursive(data, extracted_channels) | |
| # Deduplicate (using channelId as the unique key) | |
| unique_channels = {ch['channelId']: ch for ch in extracted_channels}.values() | |
| # Sort by subscriber count (Small to Large) | |
| sorted_channels = sorted(unique_channels, key=lambda x: x['subscriberCount']) | |
| # Export | |
| with open(output_filename, 'w', encoding='utf-8') as f: | |
| json.dump(list(sorted_channels), f, indent=4) | |
| print(f"Success! Extracted {len(sorted_channels)} channels to '{output_filename}'.") | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment