gglanzani · December 28, 2025 22:07
diff --git a/add_tags.py b/add_tags.py
 #!/usr/bin/env python3
 """
 Script to automatically add tags to blog posts using OpenAI GPT-4 for content analysis.
 Analyzes the content and generates relevant tags for each post.
 """

 import os
 import json
 import yaml
 import time
 from pathlib import Path
 from typing import List, Dict, Set
 from collections import Counter

 try:
    from openai import OpenAI
 except ImportError:
    print("Error: OpenAI library not found. Please install it with: pip install openai")
    exit(1)

 def extract_frontmatter_and_content(file_path: str) -> tuple:
    """Extract frontmatter and content from a markdown file."""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Split frontmatter and content
    if content.startswith('---'):
        parts = content.split('---', 2)
        if len(parts) >= 3:
            frontmatter_str = parts[1]
            post_content = parts[2].strip()
            try:
                frontmatter = yaml.safe_load(frontmatter_str)
            except yaml.YAMLError:
                frontmatter = {}
        else:
            frontmatter = {}
            post_content = content
    else:
        frontmatter = {}
        post_content = content
    
    return frontmatter, post_content

 def generate_tags_with_openai(title: str, content: str, client: OpenAI) -> List[str]:
    """Use OpenAI GPT-4 to analyze content and generate relevant tags."""
    
    # Prepare the prompt for GPT-4
    prompt = f"""Analyze the following blog post and generate relevant tags. The blog is a technical blog covering topics like programming, AI/ML, productivity, technology reviews, and personal thoughts.

 Please generate 3-7 relevant tags that best categorize this content. Focus on:
 - Programming languages and technologies mentioned
 - Main topics and themes  
 - Tools and platforms discussed
 - Content type (tutorial, opinion, review, etc.)

 Return only a JSON array of tags, nothing else. Use lowercase, short tags (1-2 words max). Avoid overly generic tags. If the content is short, don't generate more than 1-2 tags.

 Title: {title}

 Content: {content[:3000]}{"..." if len(content) > 3000 else ""}

 Tags:"""

    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {
                    "role": "system", 
                    "content": "You are a helpful assistant that analyzes blog post content and generates relevant tags. Always respond with valid JSON array format."
                },
                {"role": "user", "content": prompt}
            ],
            max_tokens=150,
            temperature=0.3
        )
        
        # Parse the response
        response_content = response.choices[0].message.content.strip()
        
        # Try to extract JSON from the response
        try:
            # Look for JSON array in the response
            import re
            json_match = re.search(r'\[.*?\]', response_content, re.DOTALL)
            if json_match:
                tags_json = json_match.group()
                tags = json.loads(tags_json)
                # Ensure all tags are strings and clean them
                clean_tags = [str(tag).lower().strip() for tag in tags if tag]
                return clean_tags[:7]  # Limit to 7 tags max
            else:
                print(f"Warning: Could not find JSON array in response: {response_content}")
                return []
        except json.JSONDecodeError as e:
            print(f"Warning: Failed to parse JSON response: {response_content}")
            print(f"JSON Error: {e}")
            return []
            
    except Exception as e:
        print(f"Error calling OpenAI API: {e}")
        return []

 def update_frontmatter_with_tags(file_path: str, tags: List[str], dry_run: bool = True) -> bool:
    """Update the markdown file to include tags as the fourth line."""
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    # Check if tags already exist
    for line in lines:
        if line.strip().startswith('tags:'):
            if not dry_run:
                print(f"  Tags already exist in file")
            return False
    
    # Find where to insert the tags line (after title, which should be the 3rd line)
    tags_line_index = None
    for i, line in enumerate(lines):
        if line.strip().startswith('title:'):
            tags_line_index = i + 1
            break
    
    if tags_line_index is None:
        if not dry_run:
            print(f"  Could not find title line to insert tags after")
        return False
    
    # Format tags as a list
    if tags:
        sorted_tags = sorted(tags)
        tags_line = f'tags: {json.dumps(sorted_tags)}\n'
        
        # Insert the tags line
        if not dry_run:
            lines.insert(tags_line_index, tags_line)
            
            # Write back to file
            with open(file_path, 'w', encoding='utf-8') as f:
                f.writelines(lines)
    
    return True

 def main():
    """Main function to process all posts and add tags using OpenAI."""
    
    # Check for OpenAI API key
    api_key = os.getenv('OPENAI_API_KEY')
    if not api_key:
        print("Error: OPENAI_API_KEY environment variable not set!")
        print("Please set your OpenAI API key with: export OPENAI_API_KEY='your-api-key'")
        return
    
    # Initialize OpenAI client
    client = OpenAI(api_key=api_key)
    
    posts_dir = Path("content/posts")
    
    if not posts_dir.exists():
        print("Content/posts directory not found!")
        return
    
    print("Analyzing posts with OpenAI GPT-4 and suggesting tags...\n")
    
    # Get all posts without tags
    posts_to_process = []
    for post_file in sorted(posts_dir.glob("*.md")):
        # Check if tags already exist by reading the file directly
        with open(post_file, 'r', encoding='utf-8') as f:
            file_content = f.read()
        
        # Skip if tags already exist
        has_tags = False
        for line in file_content.split('\n'):
            if line.strip().startswith('tags:'):
                has_tags = True
                break
        
        if has_tags:
            continue
        
        # Extract frontmatter and content for processing
        frontmatter, content = extract_frontmatter_and_content(post_file)
        posts_to_process.append((post_file, frontmatter, content))
    
    if not posts_to_process:
        print("No posts found that need tags. All posts already have tags!")
        return
    
    print(f"Found {len(posts_to_process)} posts without tags.")
    
    # Ask for confirmation before making API calls
    response = input(f"This will make {len(posts_to_process)} OpenAI API calls. Continue? [y/N]: ")
    if response.lower().strip() not in ['y', 'yes']:
        print("Cancelled.")
        return
    
    # Process posts and collect suggestions
    all_suggested_tags = Counter()
    post_suggestions = []
    
    print("\nProcessing posts...")
    for i, (post_file, frontmatter, content) in enumerate(posts_to_process, 1):
        title = frontmatter.get('title', '')
        
        print(f"[{i}/{len(posts_to_process)}] Processing: {post_file.name}")
        
        # Generate tags using OpenAI
        suggested_tags = generate_tags_with_openai(title, content, client)
        
        if suggested_tags:
            post_suggestions.append((post_file, suggested_tags, title))
            all_suggested_tags.update(suggested_tags)
            print(f"  Generated tags: {', '.join(suggested_tags)}")
        else:
            print(f"  No tags generated")
        
        # Add a small delay to be respectful to the API
        time.sleep(1)
    
    # Show summary
    print(f"\n{'-'*80}")
    print("Most common suggested tags:")
    for tag, count in all_suggested_tags.most_common(20):
        print(f"  {tag}: {count} posts")
    
    print(f"\nSuggested tags for {len(post_suggestions)} posts:")
    print("-" * 80)
    
    # Show suggestions for each post
    for post_file, tags, title in post_suggestions:
        print(f"\n📝 {post_file.name}")
        print(f"   Title: {title}")
        print(f"   Suggested tags: {', '.join(tags)}")
    
    print(f"\n{'-'*80}")
    response = input(f"\nWould you like to apply these tags to {len(post_suggestions)} posts? [y/N]: ")
    
    if response.lower().strip() in ['y', 'yes']:
        print("\nApplying tags...")
        updated_count = 0
        
        for post_file, tags, title in post_suggestions:
            if update_frontmatter_with_tags(post_file, tags, dry_run=False):
                print(f"✓ Updated {post_file.name}")
                updated_count += 1
            else:
                print(f"- Skipped {post_file.name}")
        
        print(f"\n✅ Updated {updated_count} posts with tags!")
    else:
        print("\nNo changes made. You can run this script again to apply the suggestions.")

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Script to automatically add tags to blog posts using OpenAI GPT-4 for content analysis.
	Analyzes the content and generates relevant tags for each post.
	"""

	import os
	import json
	import yaml
	import time
	from pathlib import Path
	from typing import List, Dict, Set
	from collections import Counter

	try:
	from openai import OpenAI
	except ImportError:
	print("Error: OpenAI library not found. Please install it with: pip install openai")
	exit(1)

	def extract_frontmatter_and_content(file_path: str) -> tuple:
	"""Extract frontmatter and content from a markdown file."""
	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()

	# Split frontmatter and content
	if content.startswith('---'):
	parts = content.split('---', 2)
	if len(parts) >= 3:
	frontmatter_str = parts[1]
	post_content = parts[2].strip()
	try:
	frontmatter = yaml.safe_load(frontmatter_str)
	except yaml.YAMLError:
	frontmatter = {}
	else:
	frontmatter = {}
	post_content = content
	else:
	frontmatter = {}
	post_content = content

	return frontmatter, post_content

	def generate_tags_with_openai(title: str, content: str, client: OpenAI) -> List[str]:
	"""Use OpenAI GPT-4 to analyze content and generate relevant tags."""

	# Prepare the prompt for GPT-4
	prompt = f"""Analyze the following blog post and generate relevant tags. The blog is a technical blog covering topics like programming, AI/ML, productivity, technology reviews, and personal thoughts.

	Please generate 3-7 relevant tags that best categorize this content. Focus on:
	- Programming languages and technologies mentioned
	- Main topics and themes
	- Tools and platforms discussed
	- Content type (tutorial, opinion, review, etc.)

	Return only a JSON array of tags, nothing else. Use lowercase, short tags (1-2 words max). Avoid overly generic tags. If the content is short, don't generate more than 1-2 tags.

	Title: {title}

	Content: {content[:3000]}{"..." if len(content) > 3000 else ""}

	Tags:"""

	try:
	response = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful assistant that analyzes blog post content and generates relevant tags. Always respond with valid JSON array format."
	},
	{"role": "user", "content": prompt}
	],
	max_tokens=150,
	temperature=0.3
	)

	# Parse the response
	response_content = response.choices[0].message.content.strip()

	# Try to extract JSON from the response
	try:
	# Look for JSON array in the response
	import re
	json_match = re.search(r'\[.*?\]', response_content, re.DOTALL)
	if json_match:
	tags_json = json_match.group()
	tags = json.loads(tags_json)
	# Ensure all tags are strings and clean them
	clean_tags = [str(tag).lower().strip() for tag in tags if tag]
	return clean_tags[:7] # Limit to 7 tags max
	else:
	print(f"Warning: Could not find JSON array in response: {response_content}")
	return []
	except json.JSONDecodeError as e:
	print(f"Warning: Failed to parse JSON response: {response_content}")
	print(f"JSON Error: {e}")
	return []

	except Exception as e:
	print(f"Error calling OpenAI API: {e}")
	return []

	def update_frontmatter_with_tags(file_path: str, tags: List[str], dry_run: bool = True) -> bool:
	"""Update the markdown file to include tags as the fourth line."""
	with open(file_path, 'r', encoding='utf-8') as f:
	lines = f.readlines()

	# Check if tags already exist
	for line in lines:
	if line.strip().startswith('tags:'):
	if not dry_run:
	print(f" Tags already exist in file")
	return False

	# Find where to insert the tags line (after title, which should be the 3rd line)
	tags_line_index = None
	for i, line in enumerate(lines):
	if line.strip().startswith('title:'):
	tags_line_index = i + 1
	break

	if tags_line_index is None:
	if not dry_run:
	print(f" Could not find title line to insert tags after")
	return False

	# Format tags as a list
	if tags:
	sorted_tags = sorted(tags)
	tags_line = f'tags: {json.dumps(sorted_tags)}\n'

	# Insert the tags line
	if not dry_run:
	lines.insert(tags_line_index, tags_line)

	# Write back to file
	with open(file_path, 'w', encoding='utf-8') as f:
	f.writelines(lines)

	return True

	def main():
	"""Main function to process all posts and add tags using OpenAI."""

	# Check for OpenAI API key
	api_key = os.getenv('OPENAI_API_KEY')
	if not api_key:
	print("Error: OPENAI_API_KEY environment variable not set!")
	print("Please set your OpenAI API key with: export OPENAI_API_KEY='your-api-key'")
	return

	# Initialize OpenAI client
	client = OpenAI(api_key=api_key)

	posts_dir = Path("content/posts")

	if not posts_dir.exists():
	print("Content/posts directory not found!")
	return

	print("Analyzing posts with OpenAI GPT-4 and suggesting tags...\n")

	# Get all posts without tags
	posts_to_process = []
	for post_file in sorted(posts_dir.glob("*.md")):
	# Check if tags already exist by reading the file directly
	with open(post_file, 'r', encoding='utf-8') as f:
	file_content = f.read()

	# Skip if tags already exist
	has_tags = False
	for line in file_content.split('\n'):
	if line.strip().startswith('tags:'):
	has_tags = True
	break

	if has_tags:
	continue

	# Extract frontmatter and content for processing
	frontmatter, content = extract_frontmatter_and_content(post_file)
	posts_to_process.append((post_file, frontmatter, content))

	if not posts_to_process:
	print("No posts found that need tags. All posts already have tags!")
	return

	print(f"Found {len(posts_to_process)} posts without tags.")

	# Ask for confirmation before making API calls
	response = input(f"This will make {len(posts_to_process)} OpenAI API calls. Continue? [y/N]: ")
	if response.lower().strip() not in ['y', 'yes']:
	print("Cancelled.")
	return

	# Process posts and collect suggestions
	all_suggested_tags = Counter()
	post_suggestions = []

	print("\nProcessing posts...")
	for i, (post_file, frontmatter, content) in enumerate(posts_to_process, 1):
	title = frontmatter.get('title', '')

	print(f"[{i}/{len(posts_to_process)}] Processing: {post_file.name}")

	# Generate tags using OpenAI
	suggested_tags = generate_tags_with_openai(title, content, client)

	if suggested_tags:
	post_suggestions.append((post_file, suggested_tags, title))
	all_suggested_tags.update(suggested_tags)
	print(f" Generated tags: {', '.join(suggested_tags)}")
	else:
	print(f" No tags generated")

	# Add a small delay to be respectful to the API
	time.sleep(1)

	# Show summary
	print(f"\n{'-'*80}")
	print("Most common suggested tags:")
	for tag, count in all_suggested_tags.most_common(20):
	print(f" {tag}: {count} posts")

	print(f"\nSuggested tags for {len(post_suggestions)} posts:")
	print("-" * 80)

	# Show suggestions for each post
	for post_file, tags, title in post_suggestions:
	print(f"\n📝 {post_file.name}")
	print(f" Title: {title}")
	print(f" Suggested tags: {', '.join(tags)}")

	print(f"\n{'-'*80}")
	response = input(f"\nWould you like to apply these tags to {len(post_suggestions)} posts? [y/N]: ")

	if response.lower().strip() in ['y', 'yes']:
	print("\nApplying tags...")
	updated_count = 0

	for post_file, tags, title in post_suggestions:
	if update_frontmatter_with_tags(post_file, tags, dry_run=False):
	print(f"✓ Updated {post_file.name}")
	updated_count += 1
	else:
	print(f"- Skipped {post_file.name}")

	print(f"\n✅ Updated {updated_count} posts with tags!")
	else:
	print("\nNo changes made. You can run this script again to apply the suggestions.")

	if __name__ == "__main__":
	main()
No results found