yigitkonur · December 8, 2025 22:10
diff --git a/linkedin-to-md.py b/linkedin-to-md.py
 #!/usr/bin/env python3

 # Required parameters:
 # @raycast.schemaVersion 1
 # @raycast.title LinkedIn Profile to Markdown
 # @raycast.mode silent

 # Optional parameters:
 # @raycast.icon 💼
 # @raycast.packageName AI Tools
 # @raycast.argument1 { "type": "text", "placeholder": "LinkedIn URLs (comma or newline separated)" }
 # @raycast.needsConfirmation false

 # Documentation:
 # @raycast.author Yigit Konur
 # @raycast.authorURL https://github.com/yigitkonur
 # @raycast.description Fetches LinkedIn profiles via Clado API, generates Markdown files. Supports multiple URLs in parallel. Uses MD cache.

 import sys
 import os
 import re
 import json
 import subprocess
 import requests
 from pathlib import Path
 from datetime import datetime
 from typing import List, Dict, Optional, Tuple, Any
 from dataclasses import dataclass
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading

 # Load environment variables from .env file
 SCRIPT_DIR = Path(__file__).parent
 ENV_FILE = SCRIPT_DIR / ".env"
 MD_OUTPUT_DIR = SCRIPT_DIR / "md" / "linkedin"

 def load_env():
    """Load environment variables from .env file."""
    if ENV_FILE.exists():
        with open(ENV_FILE) as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith('#') and '=' in line:
                    key, value = line.split('=', 1)
                    key = key.strip()
                    value = value.strip()
                    if key and key not in os.environ:
                        os.environ[key] = value

 load_env()

 # Constants
 CLADO_API_URL = "https://search.clado.ai/api/enrich/scrape"
 CLADO_API_KEY = os.environ.get("CLADO_API_KEY", "")
 SOUND_SUCCESS = "/System/Library/Sounds/Glass.aiff"
 SOUND_ERROR = "/System/Library/Sounds/Basso.aiff"
 MAX_PARALLEL = 3

 # Thread-safe print lock
 print_lock = threading.Lock()


 def play_sound(success: bool = True) -> None:
    """Play notification sound on macOS."""
    sound_file = SOUND_SUCCESS if success else SOUND_ERROR
    try:
        subprocess.run(["afplay", sound_file], capture_output=True, check=True)
    except (subprocess.CalledProcessError, FileNotFoundError):
        pass


 def copy_to_clipboard(text: str) -> bool:
    """Copy text to clipboard using pbcopy."""
    try:
        subprocess.run(['pbcopy'], input=text.encode('utf-8'), check=True)
        return True
    except subprocess.CalledProcessError:
        return False


 @dataclass
 class ProfileResult:
    """Holds all results for a single profile processing."""
    url: str
    success: bool = False
    error: Optional[str] = None
    profile_data: Optional[Dict] = None
    md_path: Optional[Path] = None
    md_content: str = ""
    name: str = ""


 def extract_linkedin_username(url: str) -> Optional[str]:
    """Extract LinkedIn username from URL."""
    match = re.search(r'linkedin\.com/in/([^/?\s]+)', url)
    return match.group(1) if match else None


 def find_cached_result(username: str) -> Optional[Tuple[Path, str]]:
    """Check if we have a cached MD file for this username."""
    if not MD_OUTPUT_DIR.exists():
        return None
    
    for md_file in MD_OUTPUT_DIR.glob(f"*_{username}_*.md"):
        try:
            content = md_file.read_text(encoding='utf-8')
            return (md_file, content)
        except Exception:
            continue
    return None


 def sanitize_filename(name: str) -> str:
    """Sanitize string for use as filename."""
    sanitized = re.sub(r'[<>:"/\\|?*]', '', name)
    sanitized = re.sub(r'\s+', '_', sanitized)
    return sanitized[:80]


 def parse_urls(input_text: str) -> List[str]:
    """Parse multiple LinkedIn URLs from input (comma or newline separated)."""
    raw_urls = re.split(r'[,\n]+', input_text)
    
    valid_urls = []
    for url in raw_urls:
        url = url.strip()
        if not url:
            continue
        
        if 'linkedin.com/in/' in url:
            if not url.startswith('http'):
                url = 'https://' + url
            valid_urls.append(url)
    
    return valid_urls


 def fetch_profile(url: str) -> Dict[str, Any]:
    """Fetch LinkedIn profile from Clado API."""
    headers = {
        "Authorization": f"Bearer {CLADO_API_KEY}",
    }
    
    # API uses snake_case param: linkedin_url
    params = {"linkedin_url": url}
    
    response = requests.get(CLADO_API_URL, headers=headers, params=params, timeout=60)
    response.raise_for_status()
    
    result = response.json()
    # API returns {data: {connection: N, data: {...profile...}, posts: [...], follower: N}}
    # We need to merge profile data with posts which are at sibling level
    if 'data' in result:
        wrapper = result['data']
        profile_data = wrapper.get('data', {})
        # Add posts from sibling level into profile data
        profile_data['posts'] = wrapper.get('posts', [])
        profile_data['follower_count'] = wrapper.get('follower', 0)
        profile_data['connection_count'] = wrapper.get('connection', 0)
        return profile_data
    return result


 def format_date(date_str: Optional[str], year: Optional[int] = None, month: Optional[int] = None) -> str:
    """Format date from various inputs."""
    if year:
        if month:
            return f"{year}-{month:02d}"
        return str(year)
    if date_str:
        return date_str
    return ""


 def format_experience(experiences: List[Dict]) -> str:
    """Format experience section as Markdown."""
    if not experiences:
        return ""
    
    lines = ["## 💼 Experience", ""]
    
    for exp in experiences:
        title = exp.get('title', 'Position')
        company = exp.get('companyName', exp.get('company_name', exp.get('company', 'Company')))
        
        # Date range - handle nested start/end objects
        start_obj = exp.get('start', {})
        end_obj = exp.get('end', {})
        start_year = start_obj.get('year') if isinstance(start_obj, dict) else exp.get('start_year')
        end_year = end_obj.get('year') if isinstance(end_obj, dict) else exp.get('end_year')
        is_current = not end_year or exp.get('is_current', False)
        
        date_range = ""
        if start_year:
            end_str = "Present" if is_current else (str(end_year) if end_year else "")
            date_range = f" ({start_year} - {end_str})" if end_str else f" ({start_year})"
        
        lines.append(f"### {title} @ {company}{date_range}")
        
        # Company details
        industry = exp.get('companyIndustry', exp.get('company_industry'))
        company_size = exp.get('companyStaffCountRange', exp.get('company_size'))
        location = exp.get('location')
        
        details = []
        if industry:
            details.append(f"**Industry:** {industry}")
        if company_size:
            details.append(f"**Size:** {company_size}")
        if location:
            details.append(f"**Location:** {location}")
        
        if details:
            lines.append(" | ".join(details))
        
        # Description
        desc = exp.get('description', '')
        if desc:
            lines.append("")
            lines.append(desc.strip())
        
        lines.append("")
    
    return "\n".join(lines)


 def format_education(education: List[Dict]) -> str:
    """Format education section as Markdown."""
    if not education:
        return ""
    
    lines = ["## 🎓 Education", ""]
    
    for edu in education:
        school = edu.get('schoolName', edu.get('school_name', edu.get('school', 'Institution')))
        degree = edu.get('degree', edu.get('degree_name', ''))
        field = edu.get('fieldOfStudy', edu.get('field_of_study', edu.get('field', '')))
        
        # Build title
        title_parts = [school]
        if degree or field:
            qualification = f"{degree}" if degree else ""
            if field:
                qualification += f" in {field}" if qualification else field
            title_parts.append(qualification)
        
        lines.append(f"### {' — '.join(filter(None, title_parts))}")
        
        # Date range - handle nested start/end objects
        start_obj = edu.get('start', {})
        end_obj = edu.get('end', {})
        start_year = start_obj.get('year') if isinstance(start_obj, dict) else edu.get('start_year')
        end_year = end_obj.get('year') if isinstance(end_obj, dict) else edu.get('end_year')
        
        if start_year or end_year:
            date_str = f"{start_year or '?'} - {end_year or '?'}"
            lines.append(f"*{date_str}*")
        
        # Description
        desc = edu.get('description', '')
        if desc:
            lines.append("")
            lines.append(desc.strip())
        
        lines.append("")
    
    return "\n".join(lines)


 def format_skills(profile: Dict) -> str:
    """Format skills section from profile data."""
    skills = profile.get('skills', [])
    inferred_skills = profile.get('inferred_skills', profile.get('inferredSkills', []))
    
    if not skills and not inferred_skills:
        return ""
    
    lines = ["## 🛠️ Skills", ""]
    
    if skills:
        # Handle both string skills and object skills with 'name' key
        skill_names = []
        for s in skills[:25]:
            if isinstance(s, dict):
                name = s.get('name', '')
                endorsements = s.get('endorsementsCount', 0)
                if name:
                    if endorsements:
                        skill_names.append(f"{name} ({endorsements})")
                    else:
                        skill_names.append(name)
            else:
                skill_names.append(str(s))
        lines.append(", ".join(skill_names))
    
    if inferred_skills:
        inferred_names = [s.get('name', s) if isinstance(s, dict) else s for s in inferred_skills[:15]]
        lines.append("")
        lines.append("**Inferred:** " + ", ".join(inferred_names))
    
    lines.append("")
    return "\n".join(lines)


 def format_certifications(certifications: List[Dict]) -> str:
    """Format certifications section."""
    if not certifications:
        return ""
    
    lines = ["## 📜 Certifications", ""]
    
    for cert in certifications:
        name = cert.get('name', cert.get('title', 'Certification'))
        org = cert.get('organization', cert.get('issuer', cert.get('authority', '')))
        credential_id = cert.get('credentialId', cert.get('credential_id', ''))
        url = cert.get('url', cert.get('credentialUrl', ''))
        
        line = f"- **{name}**"
        if org:
            line += f" — {org}"
        if credential_id:
            line += f" (ID: {credential_id})"
        lines.append(line)
        if url:
            lines.append(f"  🔗 [{url}]({url})")
    
    lines.append("")
    return "\n".join(lines)


 def format_languages(languages: List[Dict]) -> str:
    """Format languages section."""
    if not languages:
        return ""
    
    lines = ["## 🌐 Languages", ""]
    
    for lang in languages:
        if isinstance(lang, dict):
            name = lang.get('name', lang.get('language', ''))
            proficiency = lang.get('proficiency', '')
        else:
            name = str(lang)
            proficiency = ''
        
        if not name:
            continue
        
        line = f"- **{name}**"
        if proficiency:
            # Clean up proficiency display
            prof_display = proficiency.replace('_', ' ').title()
            line += f" — {prof_display}"
        lines.append(line)
    
    lines.append("")
    return "\n".join(lines)


 def format_projects(projects: List[Dict]) -> str:
    """Format projects section."""
    if not projects:
        return ""
    
    lines = ["## 🚀 Projects", ""]
    
    for proj in projects:
        name = proj.get('name', proj.get('title', 'Project'))
        description = proj.get('description', '')
        url = proj.get('url', '')
        
        # Date range
        start_obj = proj.get('start', {})
        end_obj = proj.get('end', {})
        start_year = start_obj.get('year') if isinstance(start_obj, dict) else proj.get('date_from_year')
        end_year = end_obj.get('year') if isinstance(end_obj, dict) else proj.get('date_to_year')
        
        date_str = ""
        if start_year:
            date_str = f" ({start_year}"
            if end_year:
                date_str += f" - {end_year})"
            else:
                date_str += " - Present)"
        
        lines.append(f"### {name}{date_str}")
        
        if description:
            lines.append("")
            lines.append(description.strip())
        
        if url:
            lines.append(f"🔗 [{url}]({url})")
        
        lines.append("")
    
    return "\n".join(lines)


 def format_volunteer(volunteer: List[Dict]) -> str:
    """Format volunteer experience section."""
    if not volunteer:
        return ""
    
    lines = ["## 🤝 Volunteer Experience", ""]
    
    for vol in volunteer:
        role = vol.get('role', vol.get('title', 'Volunteer'))
        org = vol.get('organization', vol.get('companyName', ''))
        cause = vol.get('cause', '')
        description = vol.get('description', '')
        
        # Date range
        start_obj = vol.get('start', {})
        end_obj = vol.get('end', {})
        start_year = start_obj.get('year') if isinstance(start_obj, dict) else vol.get('date_from_year')
        end_year = end_obj.get('year') if isinstance(end_obj, dict) else vol.get('date_to_year')
        
        header = f"### {role}"
        if org:
            header += f" @ {org}"
        if start_year:
            date_str = f"{start_year} - {end_year or 'Present'}"
            header += f" ({date_str})"
        
        lines.append(header)
        
        if cause:
            lines.append(f"*Cause: {cause}*")
        
        if description:
            lines.append("")
            lines.append(description.strip())
        
        lines.append("")
    
    return "\n".join(lines)


 def format_publications(publications: List[Dict]) -> str:
    """Format publications section."""
    if not publications:
        return ""
    
    lines = ["## 📚 Publications", ""]
    
    for pub in publications:
        name = pub.get('name', pub.get('title', 'Publication'))
        publisher = pub.get('publisher', '')
        url = pub.get('url', '')
        description = pub.get('description', '')
        
        line = f"- **{name}**"
        if publisher:
            line += f" — {publisher}"
        lines.append(line)
        
        if description:
            lines.append(f"  {description[:200]}{'...' if len(description) > 200 else ''}")
        if url:
            lines.append(f"  🔗 [{url}]({url})")
    
    lines.append("")
    return "\n".join(lines)


 def format_posts(posts: List[Dict]) -> str:
    """Format ALL posts with full engagement metrics - NO TRUNCATION."""
    if not posts:
        return ""
    
    lines = [f"## 📝 Posts ({len(posts)} total)", ""]
    
    for i, post in enumerate(posts, 1):  # Show ALL posts - no limit
        # Post text
        text = post.get('text', post.get('content', ''))
        if not text:
            continue
        
        # Date
        posted_at = post.get('postedAt', post.get('posted_at', post.get('postedDate', '')))
        is_reshare = post.get('reposted', False)
        
        # Header
        header = f"### Post {i}"
        if is_reshare:
            header += " (Reshare)"
        if posted_at:
            header += f" — {posted_at}"
        lines.append(header)
        lines.append("")
        
        # Full post text (preserve formatting)
        lines.append(text.strip())
        lines.append("")
        
        # Engagement metrics - detailed breakdown
        total_reactions = post.get('totalReactionCount', 0)
        likes = post.get('likeCount', post.get('likes_count', post.get('likes', 0)))
        praise = post.get('praiseCount', 0)
        empathy = post.get('empathyCount', 0)
        appreciation = post.get('appreciationCount', 0)
        interest = post.get('InterestCount', post.get('interestCount', 0))
        comments = post.get('commentsCount', post.get('comments_count', post.get('comments', 0)))
        reposts = post.get('repostsCount', post.get('reposts_count', 0))
        
        engagement_parts = []
        if total_reactions or likes:
            reaction_str = f"👍 {total_reactions or likes}"
            # Add breakdown if available
            breakdown = []
            if likes and total_reactions: breakdown.append(f"like: {likes}")
            if praise: breakdown.append(f"praise: {praise}")
            if empathy: breakdown.append(f"empathy: {empathy}")
            if appreciation: breakdown.append(f"appreciation: {appreciation}")
            if interest: breakdown.append(f"interest: {interest}")
            if breakdown:
                reaction_str += f" ({', '.join(breakdown)})"
            engagement_parts.append(reaction_str)
        if comments:
            engagement_parts.append(f"💬 {comments} comments")
        if reposts:
            engagement_parts.append(f"🔄 {reposts} reposts")
        
        if engagement_parts:
            lines.append(f"*{' | '.join(engagement_parts)}*")
        
        # Media
        images = post.get('image', post.get('images', []))
        if images and len(images) > 0:
            img = images[0] if isinstance(images, list) else images
            img_url = img.get('url', img) if isinstance(img, dict) else img
            if img_url:
                lines.append(f"![Post Image]({img_url})")
        
        videos = post.get('video', post.get('videos', []))
        if videos and len(videos) > 0:
            vid = videos[0] if isinstance(videos, list) else videos
            vid_url = vid.get('url', vid) if isinstance(vid, dict) else vid
            if vid_url:
                lines.append(f"🎥 [Video]({vid_url})")
        
        # Post URL
        post_url = post.get('postUrl', post.get('post_url', ''))
        if post_url:
            lines.append(f"🔗 [View on LinkedIn]({post_url})")
        
        lines.append("")
        lines.append("---")
        lines.append("")
    
    return "\n".join(lines)


 def format_profile_to_markdown(data: Dict) -> Tuple[str, str]:
    """Format full profile data as Markdown. Returns (markdown_content, name)."""
    # Handle both direct profile data and nested structure
    profile = data
    
    # Extract arrays - Clado uses camelCase
    experience = data.get('fullPositions', data.get('positions', data.get('experience', [])))
    education = data.get('educations', data.get('education', []))
    posts = data.get('posts', [])
    certifications = data.get('certifications', [])
    awards = data.get('awards', [])
    skills_list = data.get('skills', [])
    languages = data.get('languages', [])
    projects = data.get('projects', [])
    volunteer = data.get('volunteerExperience', data.get('volunteer_experience', data.get('volunteer', [])))
    publications = data.get('publications', [])
    
    # Basic info - Clado uses camelCase
    first_name = profile.get('firstName', profile.get('first_name', ''))
    last_name = profile.get('lastName', profile.get('last_name', ''))
    name = f"{first_name} {last_name}".strip() or profile.get('name', 'Unknown')
    headline = profile.get('headline', '')
    summary = profile.get('summary', '')
    username = profile.get('username', '')
    linkedin_url = f"https://www.linkedin.com/in/{username}/" if username else profile.get('linkedin_url', '')
    picture_url = profile.get('profilePicture', profile.get('picture_url', ''))
    
    # Location from geo object
    geo = profile.get('geo', {})
    location = geo.get('full', geo.get('city', '')) if isinstance(geo, dict) else profile.get('location', '')
    
    # Network stats (from wrapper level or profile)
    followers = profile.get('follower_count', profile.get('followersCount', profile.get('followers_count', 0)))
    connections = profile.get('connection_count', profile.get('connectionsCount', profile.get('connections_count', 0)))
    
    # Current role from first position
    current_title = ''
    current_company = ''
    if experience:
        current_title = experience[0].get('title', '')
        current_company = experience[0].get('companyName', '')
    
    lines = [f"# {name}"]
    
    if headline:
        lines.append(f"*{headline}*")
    
    lines.append("")
    
    # Profile picture (if available)
    if picture_url:
        lines.append(f"![Profile Picture]({picture_url})")
        lines.append("")
    
    # Quick stats
    stats = []
    if location:
        stats.append(f"📍 {location}")
    if followers:
        stats.append(f"👥 {followers:,} followers")
    if connections:
        stats.append(f"🔗 {connections:,} connections")
    
    if stats:
        lines.append(" | ".join(stats))
        lines.append("")
    
    # LinkedIn URL
    if linkedin_url:
        lines.append(f"🔗 [{linkedin_url}]({linkedin_url})")
        lines.append("")
    
    # Summary/About
    if summary:
        lines.append("## 📋 About")
        lines.append("")
        lines.append(summary.strip())
        lines.append("")
    
    lines.append("---")
    lines.append("")
    
    # Current Role
    if current_title:
        lines.append("## 🎯 Current Role")
        lines.append("")
        role_line = f"**{current_title}**"
        if current_company:
            role_line += f" at {current_company}"
        lines.append(role_line)
        lines.append("")
    
    # Experience
    exp_md = format_experience(experience)
    if exp_md:
        lines.append(exp_md)
    
    # Education
    edu_md = format_education(education)
    if edu_md:
        lines.append(edu_md)
    
    # Skills
    skills_md = format_skills(profile)
    if skills_md:
        lines.append(skills_md)
    
    # Certifications
    cert_md = format_certifications(certifications)
    if cert_md:
        lines.append(cert_md)
    
    # Awards
    if awards:
        lines.append("## 🏆 Awards")
        lines.append("")
        for award in awards:
            award_name = award.get('name', award.get('title', 'Award'))
            issuer = award.get('issuer', '')
            line = f"- **{award_name}**"
            if issuer:
                line += f" — {issuer}"
            lines.append(line)
        lines.append("")
    
    # Languages
    lang_md = format_languages(languages)
    if lang_md:
        lines.append(lang_md)
    
    # Projects
    proj_md = format_projects(projects)
    if proj_md:
        lines.append(proj_md)
    
    # Volunteer Experience
    vol_md = format_volunteer(volunteer)
    if vol_md:
        lines.append(vol_md)
    
    # Publications
    pub_md = format_publications(publications)
    if pub_md:
        lines.append(pub_md)
    
    # Posts
    posts_md = format_posts(posts)
    if posts_md:
        lines.append(posts_md)
    
    # Metadata footer
    lines.append("---")
    lines.append(f"*Fetched: {datetime.now().strftime('%Y-%m-%d %H:%M')} via Clado API*")
    
    return "\n".join(lines), name


 def process_single_profile(url: str, index: int, total: int) -> ProfileResult:
    """Process a single LinkedIn profile."""
    result = ProfileResult(url=url)
    
    try:
        # Check cache first
        username = extract_linkedin_username(url)
        if username:
            cached = find_cached_result(username)
            if cached:
                md_path, md_content = cached
                result.md_path = md_path
                result.md_content = md_content
                # Extract name from first line
                first_line = md_content.split('\n')[0]
                result.name = first_line.replace('# ', '').strip()
                result.success = True
                return result
        
        # Fetch from API
        data = fetch_profile(url)
        result.profile_data = data
        
        # Format as Markdown
        md_content, name = format_profile_to_markdown(data)
        result.md_content = md_content
        result.name = name
        
        # Save to file
        MD_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        safe_name = sanitize_filename(name)
        safe_username = sanitize_filename(username or 'unknown')
        md_filename = f"{safe_name}_{safe_username}_{timestamp}.md"
        md_path = MD_OUTPUT_DIR / md_filename
        
        with open(md_path, 'w', encoding='utf-8') as f:
            f.write(md_content)
        result.md_path = md_path
        
        result.success = True
        
    except requests.exceptions.HTTPError as e:
        result.error = f"HTTP {e.response.status_code}: {e.response.text[:100]}"
    except requests.exceptions.Timeout:
        result.error = "Request timeout"
    except Exception as e:
        result.error = str(e)
    
    return result


 def main():
    """Main entry point."""
    if len(sys.argv) < 2:
        play_sound(success=False)
        sys.exit(1)
    
    if not CLADO_API_KEY:
        play_sound(success=False)
        sys.exit(1)
    
    input_text = sys.argv[1].strip()
    urls = parse_urls(input_text)
    
    if not urls:
        play_sound(success=False)
        sys.exit(1)
    
    total = len(urls)
    results: List[ProfileResult] = []
    
    try:
        if total > 1:
            # Parallel processing
            with ThreadPoolExecutor(max_workers=MAX_PARALLEL) as executor:
                futures = {
                    executor.submit(process_single_profile, url, i, total): url
                    for i, url in enumerate(urls, 1)
                }
                for future in as_completed(futures):
                    results.append(future.result())
        else:
            # Single profile
            results.append(process_single_profile(urls[0], 1, 1))
        
        # Sort by original order
        results_by_url = {r.url: r for r in results}
        results = [results_by_url[url] for url in urls if url in results_by_url]
        
        # Build clipboard content
        successful = [r for r in results if r.success]
        
        if successful:
            clipboard_parts = []
            for r in successful:
                # md_content already has the title, so just use it directly
                clipboard_parts.append(r.md_content)
            
            combined = "\n\n---\n\n".join(clipboard_parts)
            copy_to_clipboard(combined)
            play_sound(success=True)
        else:
            play_sound(success=False)
            sys.exit(1)
            
    except Exception:
        play_sound(success=False)
        sys.exit(1)


 if __name__ == "__main__":
    main()
No results found