bryanseah234 · December 16, 2025 00:05
diff --git a/sortchannels.py b/sortchannels.py
 import json
 import re

 # you need to find the script nonce that contains channel ID, then copy and save the whole element into "youtube js.txt"

 def parse_subscriber_count(count_text):
    """
    Parses strings like '1.05M subscribers' or '45K subscribers' into integers.
    """
    if not count_text:
        return 0
    
    # Extract the numeric part and the suffix (K or M)
    match = re.search(r'([\d\.]+)\s*([KM]?)', count_text, re.IGNORECASE)
    if not match:
        return 0
    
    number_str = match.group(1)
    multiplier = match.group(2).upper()
    
    try:
        value = float(number_str)
    except ValueError:
        return 0
    
    if multiplier == 'K':
        return int(value * 1000)
    elif multiplier == 'M':
        return int(value * 1_000_000)
    
    return int(value)

 def extract_text_from_node(node):
    """
    Helper to extract text from YouTube's nested text objects 
    (handles both 'simpleText' and 'runs').
    """
    if not isinstance(node, dict):
        return ""
        
    if 'simpleText' in node:
        return node['simpleText']
    elif 'runs' in node:
        return "".join([r.get('text', '') for r in node['runs']])
    return ""

 def extract_channels_recursive(data, channels_list):
    """
    Recursively searches the JSON for channel entries.
    """
    if isinstance(data, dict):
        # We look for an object that has a channelId
        if 'channelId' in data:
            count_text = ""
            
            # STRATEGY: Try 'videoCountText' first (as seen in your file),
            # then fallback to 'subscriberCountText'.
            if 'videoCountText' in data:
                text = extract_text_from_node(data['videoCountText'])
                if "subscriber" in text.lower():
                    count_text = text

            if not count_text and 'subscriberCountText' in data:
                text = extract_text_from_node(data['subscriberCountText'])
                # Only use if it looks like a count (contains digits)
                if any(char.isdigit() for char in text):
                    count_text = text

            # If we found a valid subscriber count, extract the name and add to list
            if count_text:
                name = ""
                if 'title' in data:
                    name = extract_text_from_node(data['title'])

                channels_list.append({
                    'channelName': name,
                    'channelId': data['channelId'],
                    'subscriberCount': parse_subscriber_count(count_text),
                    'subscriberCountText': count_text
                })
        
        # Continue recursion
        for value in data.values():
            extract_channels_recursive(value, channels_list)
            
    elif isinstance(data, list):
        for item in data:
            extract_channels_recursive(item, channels_list)

 def main():
    input_filename = 'youtube js.txt'
    output_filename = 'subscriptions.json'

    try:
        with open(input_filename, 'r', encoding='utf-8') as f:
            content = f.read()

        # Locate JSON
        match = re.search(r'var ytInitialData\s*=\s*(\{.*?\});', content, re.DOTALL)
        if not match:
            print(f"Error: Could not find 'ytInitialData' in {input_filename}")
            return

        json_str = match.group(1)
        data = json.loads(json_str)

        # Extract
        extracted_channels = []
        extract_channels_recursive(data, extracted_channels)

        # Deduplicate (using channelId as the unique key)
        unique_channels = {ch['channelId']: ch for ch in extracted_channels}.values()

        # Sort by subscriber count (Small to Large)
        sorted_channels = sorted(unique_channels, key=lambda x: x['subscriberCount'])

        # Export
        with open(output_filename, 'w', encoding='utf-8') as f:
            json.dump(list(sorted_channels), f, indent=4)

        print(f"Success! Extracted {len(sorted_channels)} channels to '{output_filename}'.")

    except Exception as e:
        print(f"An error occurred: {e}")

 if __name__ == "__main__":
    main()
	import json
	import re

	# you need to find the script nonce that contains channel ID, then copy and save the whole element into "youtube js.txt"

	def parse_subscriber_count(count_text):
	"""
	Parses strings like '1.05M subscribers' or '45K subscribers' into integers.
	"""
	if not count_text:
	return 0

	# Extract the numeric part and the suffix (K or M)
	match = re.search(r'([\d\.]+)\s*([KM]?)', count_text, re.IGNORECASE)
	if not match:
	return 0

	number_str = match.group(1)
	multiplier = match.group(2).upper()

	try:
	value = float(number_str)
	except ValueError:
	return 0

	if multiplier == 'K':
	return int(value * 1000)
	elif multiplier == 'M':
	return int(value * 1_000_000)

	return int(value)

	def extract_text_from_node(node):
	"""
	Helper to extract text from YouTube's nested text objects
	(handles both 'simpleText' and 'runs').
	"""
	if not isinstance(node, dict):
	return ""

	if 'simpleText' in node:
	return node['simpleText']
	elif 'runs' in node:
	return "".join([r.get('text', '') for r in node['runs']])
	return ""

	def extract_channels_recursive(data, channels_list):
	"""
	Recursively searches the JSON for channel entries.
	"""
	if isinstance(data, dict):
	# We look for an object that has a channelId
	if 'channelId' in data:
	count_text = ""

	# STRATEGY: Try 'videoCountText' first (as seen in your file),
	# then fallback to 'subscriberCountText'.
	if 'videoCountText' in data:
	text = extract_text_from_node(data['videoCountText'])
	if "subscriber" in text.lower():
	count_text = text

	if not count_text and 'subscriberCountText' in data:
	text = extract_text_from_node(data['subscriberCountText'])
	# Only use if it looks like a count (contains digits)
	if any(char.isdigit() for char in text):
	count_text = text

	# If we found a valid subscriber count, extract the name and add to list
	if count_text:
	name = ""
	if 'title' in data:
	name = extract_text_from_node(data['title'])

	channels_list.append({
	'channelName': name,
	'channelId': data['channelId'],
	'subscriberCount': parse_subscriber_count(count_text),
	'subscriberCountText': count_text
	})

	# Continue recursion
	for value in data.values():
	extract_channels_recursive(value, channels_list)

	elif isinstance(data, list):
	for item in data:
	extract_channels_recursive(item, channels_list)

	def main():
	input_filename = 'youtube js.txt'
	output_filename = 'subscriptions.json'

	try:
	with open(input_filename, 'r', encoding='utf-8') as f:
	content = f.read()

	# Locate JSON
	match = re.search(r'var ytInitialData\s=\s(\{.*?\});', content, re.DOTALL)
	if not match:
	print(f"Error: Could not find 'ytInitialData' in {input_filename}")
	return

	json_str = match.group(1)
	data = json.loads(json_str)

	# Extract
	extracted_channels = []
	extract_channels_recursive(data, extracted_channels)

	# Deduplicate (using channelId as the unique key)
	unique_channels = {ch['channelId']: ch for ch in extracted_channels}.values()

	# Sort by subscriber count (Small to Large)
	sorted_channels = sorted(unique_channels, key=lambda x: x['subscriberCount'])

	# Export
	with open(output_filename, 'w', encoding='utf-8') as f:
	json.dump(list(sorted_channels), f, indent=4)

	print(f"Success! Extracted {len(sorted_channels)} channels to '{output_filename}'.")

	except Exception as e:
	print(f"An error occurred: {e}")

	if __name__ == "__main__":
	main()
No results found