Skip to content

Instantly share code, notes, and snippets.

@yigitkonur
Created December 8, 2025 21:42
Show Gist options
  • Select an option

  • Save yigitkonur/c2254bf9fbfc99c6066b788b7920a9cd to your computer and use it in GitHub Desktop.

Select an option

Save yigitkonur/c2254bf9fbfc99c6066b788b7920a9cd to your computer and use it in GitHub Desktop.
Raycast script: Parse URLs with Jina AI - reads URLs from clipboard, processes in parallel, copies formatted markdown content
#!/usr/bin/env python3
# Required parameters:
# @raycast.schemaVersion 1
# @raycast.title Parse URLs with Jina AI
# @raycast.mode silent
# Optional parameters:
# @raycast.icon 🌐
# @raycast.packageName AI Tools
# @raycast.needsConfirmation false
# Documentation:
# @raycast.author AI Assistant
# @raycast.description Reads URLs from clipboard, processes them with Jina AI in parallel, and copies formatted content
import asyncio
import httpx
import json
import os
import re
import pyperclip
import subprocess
import sys
import traceback
from pathlib import Path
from typing import List, Dict, Tuple, Optional
from urllib.parse import urlparse
# Load environment variables from .env file
SCRIPT_DIR = Path(__file__).parent
ENV_FILE = SCRIPT_DIR / ".env"
def load_env():
"""Load environment variables from .env file."""
if ENV_FILE.exists():
with open(ENV_FILE) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
if key and key not in os.environ:
os.environ[key] = value
load_env()
# Configuration
JINA_API_KEY = os.environ.get("JINA_API_KEY", "")
JINA_API_URL = "https://r.jina.ai/"
SOUND_FILE = "/System/Library/Sounds/Pop.aiff"
def get_clipboard_content() -> str:
"""Get clipboard content with fallback methods"""
try:
return pyperclip.paste()
except:
try:
result = subprocess.run(
["pbpaste"], capture_output=True, text=True, check=True
)
return result.stdout
except subprocess.CalledProcessError:
print("❌ Could not access clipboard")
sys.exit(1)
def set_clipboard_content(text: str) -> bool:
"""Set clipboard content with fallback methods"""
try:
pyperclip.copy(text)
return True
except:
try:
subprocess.run(["pbcopy"], input=text, text=True, check=True)
return True
except subprocess.CalledProcessError:
return False
def play_sound() -> None:
"""Play notification sound on macOS"""
try:
subprocess.run(["afplay", SOUND_FILE], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
# Silently fail if sound file doesn't exist or afplay is not available
pass
def is_valid_url(url: str) -> bool:
"""Check if a string is a valid URL"""
url = url.strip()
if not url:
return False
# Basic URL pattern check
url_pattern = re.compile(
r"^https?://" # http:// or https://
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|" # domain...
r"localhost|" # localhost...
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
r"(?::\d+)?" # optional port
r"(?:/?|[/?]\S+)$",
re.IGNORECASE,
)
return url_pattern.match(url) is not None
def extract_urls_from_text(text: str) -> List[str]:
"""Extract and validate URLs from text"""
lines = text.strip().split("\n")
urls = []
for line in lines:
line = line.strip()
if line and is_valid_url(line):
urls.append(line)
return urls
async def fetch_url_content(
client: httpx.AsyncClient, url: str
) -> Tuple[str, int, Dict]:
"""Fetch content from a single URL using Jina AI API"""
headers = {
"Accept": "application/json",
"Authorization": f"Bearer {JINA_API_KEY}",
"Content-Type": "application/json",
"X-Retain-Images": "none",
"X-Return-Format": "markdown",
"X-With-Links-Summary": "true",
}
payload = {"url": url}
try:
response = await client.post(
JINA_API_URL, headers=headers, json=payload, timeout=30.0
)
if response.status_code == 200:
data = response.json()
content = data.get("data", {})
return url, 200, content
else:
try:
error_data = response.json()
error_msg = error_data.get("error", {}).get(
"message", f"HTTP {response.status_code}"
)
except:
error_msg = f"HTTP {response.status_code} - {response.text[:100]}"
return url, response.status_code, {"error": error_msg}
except httpx.TimeoutException:
return url, 408, {"error": "Request timeout (30s)"}
except httpx.RequestError as e:
return url, 500, {"error": f"Request failed: {str(e)}"}
except Exception as e:
return url, 500, {"error": f"Unexpected error: {str(e)}"}
def format_result(url: str, status_code: int, content: Dict) -> str:
"""Format a single URL result"""
output = f"\n========================================\n"
output += f"# URL: {url}\n"
output += f"========================================\n\n"
if status_code == 200 and "error" not in content:
title = content.get("title", "")
description = content.get("description", "")
text_content = content.get("content", "")
if title:
output += f"## {title}\n\n"
if description and description != text_content.split("\n")[0]:
output += f"{description}\n\n"
if text_content:
# Clean up content formatting
lines = text_content.split("\n")
cleaned_lines = []
prev_line_empty = False
for line in lines:
line = line.strip()
if line:
cleaned_lines.append(line)
prev_line_empty = False
elif not prev_line_empty:
cleaned_lines.append("")
prev_line_empty = True
output += "\n".join(cleaned_lines)
# Add links summary if available
links = content.get("links", {})
if links and len(links) > 0:
output += "\n\n**Links:**\n"
for text, link_url in links.items():
output += f"- [{text}]({link_url})\n"
else:
error_msg = content.get("error", "Unknown error")
output += f"**Failed URL - Status Code: {status_code}**\n\n"
output += f"Error: {error_msg}\n"
output += "\n---"
return output
async def main():
try:
# Check for API key
if not JINA_API_KEY:
print("❌ JINA_API_KEY not set")
print("πŸ’‘ Add to .env file: JINA_API_KEY=your_key_here")
sys.exit(1)
# Get clipboard content
clipboard_text = get_clipboard_content()
if not clipboard_text.strip():
print("❌ Clipboard is empty")
sys.exit(1)
# Extract URLs from clipboard
urls = extract_urls_from_text(clipboard_text)
if not urls:
print("❌ No valid URLs found in clipboard")
print(
"πŸ’‘ Please ensure URLs are separated by new lines and start with http:// or https://"
)
sys.exit(1)
print(f"πŸ”„ Processing {len(urls)} URL(s)...")
# Process URLs in parallel
async with httpx.AsyncClient() as client:
tasks = [fetch_url_content(client, url) for url in urls]
results = await asyncio.gather(*tasks)
# Format all results
final_output = ""
# Sort results by original URL order
url_to_result = {url: (status, content) for url, status, content in results}
for url in urls:
if url in url_to_result:
status_code, content = url_to_result[url]
formatted = format_result(url, status_code, content)
final_output += formatted
# Remove trailing newline and dashes
final_output = final_output.rstrip()
if final_output.endswith("---"):
final_output = final_output[:-3].rstrip()
# Copy to clipboard
if set_clipboard_content(final_output):
print(f"βœ… Successfully processed {len(urls)} URL(s)")
print(f"πŸ“‹ Formatted content copied to clipboard")
else:
print("❌ Failed to copy to clipboard")
print(final_output)
sys.exit(1)
# Play success sound
play_sound()
except ImportError as e:
print(f"❌ Missing dependency: {e}")
print("πŸ’‘ Install required packages with:")
print(" uv pip install --system httpx pyperclip")
print(" Or: uv pip install --python=$(which python3) httpx pyperclip")
sys.exit(1)
except Exception as e:
print(f"❌ Unexpected error: {e}")
print("πŸ”§ Debug info:")
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
# Run the async main function
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment