yigitkonur · December 8, 2025 21:42
diff --git a/urls-to-md.py b/urls-to-md.py
 #!/usr/bin/env python3

 # Required parameters:
 # @raycast.schemaVersion 1
 # @raycast.title Parse URLs with Jina AI
 # @raycast.mode silent

 # Optional parameters:
 # @raycast.icon 🌐
 # @raycast.packageName AI Tools
 # @raycast.needsConfirmation false

 # Documentation:
 # @raycast.author AI Assistant
 # @raycast.description Reads URLs from clipboard, processes them with Jina AI in parallel, and copies formatted content

 import asyncio
 import httpx
 import json
 import os
 import re
 import pyperclip
 import subprocess
 import sys
 import traceback
 from pathlib import Path
 from typing import List, Dict, Tuple, Optional
 from urllib.parse import urlparse

 # Load environment variables from .env file
 SCRIPT_DIR = Path(__file__).parent
 ENV_FILE = SCRIPT_DIR / ".env"

 def load_env():
    """Load environment variables from .env file."""
    if ENV_FILE.exists():
        with open(ENV_FILE) as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith('#') and '=' in line:
                    key, value = line.split('=', 1)
                    key = key.strip()
                    value = value.strip()
                    if key and key not in os.environ:
                        os.environ[key] = value

 load_env()

 # Configuration
 JINA_API_KEY = os.environ.get("JINA_API_KEY", "")
 JINA_API_URL = "https://r.jina.ai/"
 SOUND_FILE = "/System/Library/Sounds/Pop.aiff"


 def get_clipboard_content() -> str:
    """Get clipboard content with fallback methods"""
    try:
        return pyperclip.paste()
    except:
        try:
            result = subprocess.run(
                ["pbpaste"], capture_output=True, text=True, check=True
            )
            return result.stdout
        except subprocess.CalledProcessError:
            print("❌ Could not access clipboard")
            sys.exit(1)


 def set_clipboard_content(text: str) -> bool:
    """Set clipboard content with fallback methods"""
    try:
        pyperclip.copy(text)
        return True
    except:
        try:
            subprocess.run(["pbcopy"], input=text, text=True, check=True)
            return True
        except subprocess.CalledProcessError:
            return False


 def play_sound() -> None:
    """Play notification sound on macOS"""
    try:
        subprocess.run(["afplay", SOUND_FILE], capture_output=True, check=True)
    except (subprocess.CalledProcessError, FileNotFoundError):
        # Silently fail if sound file doesn't exist or afplay is not available
        pass


 def is_valid_url(url: str) -> bool:
    """Check if a string is a valid URL"""
    url = url.strip()
    if not url:
        return False

    # Basic URL pattern check
    url_pattern = re.compile(
        r"^https?://"  # http:// or https://
        r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|"  # domain...
        r"localhost|"  # localhost...
        r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"  # ...or ip
        r"(?::\d+)?"  # optional port
        r"(?:/?|[/?]\S+)$",
        re.IGNORECASE,
    )

    return url_pattern.match(url) is not None


 def extract_urls_from_text(text: str) -> List[str]:
    """Extract and validate URLs from text"""
    lines = text.strip().split("\n")
    urls = []

    for line in lines:
        line = line.strip()
        if line and is_valid_url(line):
            urls.append(line)

    return urls


 async def fetch_url_content(
    client: httpx.AsyncClient, url: str
 ) -> Tuple[str, int, Dict]:
    """Fetch content from a single URL using Jina AI API"""
    headers = {
        "Accept": "application/json",
        "Authorization": f"Bearer {JINA_API_KEY}",
        "Content-Type": "application/json",
        "X-Retain-Images": "none",
        "X-Return-Format": "markdown",
        "X-With-Links-Summary": "true",
    }

    payload = {"url": url}

    try:
        response = await client.post(
            JINA_API_URL, headers=headers, json=payload, timeout=30.0
        )

        if response.status_code == 200:
            data = response.json()
            content = data.get("data", {})
            return url, 200, content
        else:
            try:
                error_data = response.json()
                error_msg = error_data.get("error", {}).get(
                    "message", f"HTTP {response.status_code}"
                )
            except:
                error_msg = f"HTTP {response.status_code} - {response.text[:100]}"
            return url, response.status_code, {"error": error_msg}

    except httpx.TimeoutException:
        return url, 408, {"error": "Request timeout (30s)"}
    except httpx.RequestError as e:
        return url, 500, {"error": f"Request failed: {str(e)}"}
    except Exception as e:
        return url, 500, {"error": f"Unexpected error: {str(e)}"}


 def format_result(url: str, status_code: int, content: Dict) -> str:
    """Format a single URL result"""
    output = f"\n========================================\n"
    output += f"# URL: {url}\n"
    output += f"========================================\n\n"

    if status_code == 200 and "error" not in content:
        title = content.get("title", "")
        description = content.get("description", "")
        text_content = content.get("content", "")

        if title:
            output += f"## {title}\n\n"

        if description and description != text_content.split("\n")[0]:
            output += f"{description}\n\n"

        if text_content:
            # Clean up content formatting
            lines = text_content.split("\n")
            cleaned_lines = []
            prev_line_empty = False

            for line in lines:
                line = line.strip()
                if line:
                    cleaned_lines.append(line)
                    prev_line_empty = False
                elif not prev_line_empty:
                    cleaned_lines.append("")
                    prev_line_empty = True

            output += "\n".join(cleaned_lines)

        # Add links summary if available
        links = content.get("links", {})
        if links and len(links) > 0:
            output += "\n\n**Links:**\n"
            for text, link_url in links.items():
                output += f"- [{text}]({link_url})\n"
    else:
        error_msg = content.get("error", "Unknown error")
        output += f"**Failed URL - Status Code: {status_code}**\n\n"
        output += f"Error: {error_msg}\n"

    output += "\n---"

    return output


 async def main():
    try:
        # Check for API key
        if not JINA_API_KEY:
            print("❌ JINA_API_KEY not set")
            print("💡 Add to .env file: JINA_API_KEY=your_key_here")
            sys.exit(1)
        
        # Get clipboard content
        clipboard_text = get_clipboard_content()

        if not clipboard_text.strip():
            print("❌ Clipboard is empty")
            sys.exit(1)

        # Extract URLs from clipboard
        urls = extract_urls_from_text(clipboard_text)

        if not urls:
            print("❌ No valid URLs found in clipboard")
            print(
                "💡 Please ensure URLs are separated by new lines and start with http:// or https://"
            )
            sys.exit(1)

        print(f"🔄 Processing {len(urls)} URL(s)...")

        # Process URLs in parallel
        async with httpx.AsyncClient() as client:
            tasks = [fetch_url_content(client, url) for url in urls]
            results = await asyncio.gather(*tasks)

        # Format all results
        final_output = ""

        # Sort results by original URL order
        url_to_result = {url: (status, content) for url, status, content in results}

        for url in urls:
            if url in url_to_result:
                status_code, content = url_to_result[url]
                formatted = format_result(url, status_code, content)
                final_output += formatted

        # Remove trailing newline and dashes
        final_output = final_output.rstrip()
        if final_output.endswith("---"):
            final_output = final_output[:-3].rstrip()

        # Copy to clipboard
        if set_clipboard_content(final_output):
            print(f"✅ Successfully processed {len(urls)} URL(s)")
            print(f"📋 Formatted content copied to clipboard")
        else:
            print("❌ Failed to copy to clipboard")
            print(final_output)
            sys.exit(1)

        # Play success sound
        play_sound()

    except ImportError as e:
        print(f"❌ Missing dependency: {e}")
        print("💡 Install required packages with:")
        print("   uv pip install --system httpx pyperclip")
        print("   Or: uv pip install --python=$(which python3) httpx pyperclip")
        sys.exit(1)

    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        print("🔧 Debug info:")
        traceback.print_exc()
        sys.exit(1)


 if __name__ == "__main__":
    # Run the async main function
    asyncio.run(main())
	#!/usr/bin/env python3

	# Required parameters:
	# @raycast.schemaVersion 1
	# @raycast.title Parse URLs with Jina AI
	# @raycast.mode silent

	# Optional parameters:
	# @raycast.icon 🌐
	# @raycast.packageName AI Tools
	# @raycast.needsConfirmation false

	# Documentation:
	# @raycast.author AI Assistant
	# @raycast.description Reads URLs from clipboard, processes them with Jina AI in parallel, and copies formatted content

	import asyncio
	import httpx
	import json
	import os
	import re
	import pyperclip
	import subprocess
	import sys
	import traceback
	from pathlib import Path
	from typing import List, Dict, Tuple, Optional
	from urllib.parse import urlparse

	# Load environment variables from .env file
	SCRIPT_DIR = Path(__file__).parent
	ENV_FILE = SCRIPT_DIR / ".env"

	def load_env():
	"""Load environment variables from .env file."""
	if ENV_FILE.exists():
	with open(ENV_FILE) as f:
	for line in f:
	line = line.strip()
	if line and not line.startswith('#') and '=' in line:
	key, value = line.split('=', 1)
	key = key.strip()
	value = value.strip()
	if key and key not in os.environ:
	os.environ[key] = value

	load_env()

	# Configuration
	JINA_API_KEY = os.environ.get("JINA_API_KEY", "")
	JINA_API_URL = "https://r.jina.ai/"
	SOUND_FILE = "/System/Library/Sounds/Pop.aiff"


	def get_clipboard_content() -> str:
	"""Get clipboard content with fallback methods"""
	try:
	return pyperclip.paste()
	except:
	try:
	result = subprocess.run(
	["pbpaste"], capture_output=True, text=True, check=True
	)
	return result.stdout
	except subprocess.CalledProcessError:
	print("❌ Could not access clipboard")
	sys.exit(1)


	def set_clipboard_content(text: str) -> bool:
	"""Set clipboard content with fallback methods"""
	try:
	pyperclip.copy(text)
	return True
	except:
	try:
	subprocess.run(["pbcopy"], input=text, text=True, check=True)
	return True
	except subprocess.CalledProcessError:
	return False


	def play_sound() -> None:
	"""Play notification sound on macOS"""
	try:
	subprocess.run(["afplay", SOUND_FILE], capture_output=True, check=True)
	except (subprocess.CalledProcessError, FileNotFoundError):
	# Silently fail if sound file doesn't exist or afplay is not available
	pass


	def is_valid_url(url: str) -> bool:
	"""Check if a string is a valid URL"""
	url = url.strip()
	if not url:
	return False

	# Basic URL pattern check
	url_pattern = re.compile(
	r"^https?://" # http:// or https://
	r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?\|" # domain...
	r"localhost\|" # localhost...
	r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
	r"(?::\d+)?" # optional port
	r"(?:/?\|[/?]\S+)$",
	re.IGNORECASE,
	)

	return url_pattern.match(url) is not None


	def extract_urls_from_text(text: str) -> List[str]:
	"""Extract and validate URLs from text"""
	lines = text.strip().split("\n")
	urls = []

	for line in lines:
	line = line.strip()
	if line and is_valid_url(line):
	urls.append(line)

	return urls


	async def fetch_url_content(
	client: httpx.AsyncClient, url: str
	) -> Tuple[str, int, Dict]:
	"""Fetch content from a single URL using Jina AI API"""
	headers = {
	"Accept": "application/json",
	"Authorization": f"Bearer {JINA_API_KEY}",
	"Content-Type": "application/json",
	"X-Retain-Images": "none",
	"X-Return-Format": "markdown",
	"X-With-Links-Summary": "true",
	}

	payload = {"url": url}

	try:
	response = await client.post(
	JINA_API_URL, headers=headers, json=payload, timeout=30.0
	)

	if response.status_code == 200:
	data = response.json()
	content = data.get("data", {})
	return url, 200, content
	else:
	try:
	error_data = response.json()
	error_msg = error_data.get("error", {}).get(
	"message", f"HTTP {response.status_code}"
	)
	except:
	error_msg = f"HTTP {response.status_code} - {response.text[:100]}"
	return url, response.status_code, {"error": error_msg}

	except httpx.TimeoutException:
	return url, 408, {"error": "Request timeout (30s)"}
	except httpx.RequestError as e:
	return url, 500, {"error": f"Request failed: {str(e)}"}
	except Exception as e:
	return url, 500, {"error": f"Unexpected error: {str(e)}"}


	def format_result(url: str, status_code: int, content: Dict) -> str:
	"""Format a single URL result"""
	output = f"\n========================================\n"
	output += f"# URL: {url}\n"
	output += f"========================================\n\n"

	if status_code == 200 and "error" not in content:
	title = content.get("title", "")
	description = content.get("description", "")
	text_content = content.get("content", "")

	if title:
	output += f"## {title}\n\n"

	if description and description != text_content.split("\n")[0]:
	output += f"{description}\n\n"

	if text_content:
	# Clean up content formatting
	lines = text_content.split("\n")
	cleaned_lines = []
	prev_line_empty = False

	for line in lines:
	line = line.strip()
	if line:
	cleaned_lines.append(line)
	prev_line_empty = False
	elif not prev_line_empty:
	cleaned_lines.append("")
	prev_line_empty = True

	output += "\n".join(cleaned_lines)

	# Add links summary if available
	links = content.get("links", {})
	if links and len(links) > 0:
	output += "\n\nLinks:\n"
	for text, link_url in links.items():
	output += f"- [{text}]({link_url})\n"
	else:
	error_msg = content.get("error", "Unknown error")
	output += f"Failed URL - Status Code: {status_code}\n\n"
	output += f"Error: {error_msg}\n"

	output += "\n---"

	return output


	async def main():
	try:
	# Check for API key
	if not JINA_API_KEY:
	print("❌ JINA_API_KEY not set")
	print("💡 Add to .env file: JINA_API_KEY=your_key_here")
	sys.exit(1)

	# Get clipboard content
	clipboard_text = get_clipboard_content()

	if not clipboard_text.strip():
	print("❌ Clipboard is empty")
	sys.exit(1)

	# Extract URLs from clipboard
	urls = extract_urls_from_text(clipboard_text)

	if not urls:
	print("❌ No valid URLs found in clipboard")
	print(
	"💡 Please ensure URLs are separated by new lines and start with http:// or https://"
	)
	sys.exit(1)

	print(f"🔄 Processing {len(urls)} URL(s)...")

	# Process URLs in parallel
	async with httpx.AsyncClient() as client:
	tasks = [fetch_url_content(client, url) for url in urls]
	results = await asyncio.gather(*tasks)

	# Format all results
	final_output = ""

	# Sort results by original URL order
	url_to_result = {url: (status, content) for url, status, content in results}

	for url in urls:
	if url in url_to_result:
	status_code, content = url_to_result[url]
	formatted = format_result(url, status_code, content)
	final_output += formatted

	# Remove trailing newline and dashes
	final_output = final_output.rstrip()
	if final_output.endswith("---"):
	final_output = final_output[:-3].rstrip()

	# Copy to clipboard
	if set_clipboard_content(final_output):
	print(f"✅ Successfully processed {len(urls)} URL(s)")
	print(f"📋 Formatted content copied to clipboard")
	else:
	print("❌ Failed to copy to clipboard")
	print(final_output)
	sys.exit(1)

	# Play success sound
	play_sound()

	except ImportError as e:
	print(f"❌ Missing dependency: {e}")
	print("💡 Install required packages with:")
	print(" uv pip install --system httpx pyperclip")
	print(" Or: uv pip install --python=$(which python3) httpx pyperclip")
	sys.exit(1)

	except Exception as e:
	print(f"❌ Unexpected error: {e}")
	print("🔧 Debug info:")
	traceback.print_exc()
	sys.exit(1)


	if __name__ == "__main__":
	# Run the async main function
	asyncio.run(main())
No results found