Created
December 8, 2025 21:42
-
-
Save yigitkonur/c2254bf9fbfc99c6066b788b7920a9cd to your computer and use it in GitHub Desktop.
Raycast script: Parse URLs with Jina AI - reads URLs from clipboard, processes in parallel, copies formatted markdown content
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Required parameters: | |
| # @raycast.schemaVersion 1 | |
| # @raycast.title Parse URLs with Jina AI | |
| # @raycast.mode silent | |
| # Optional parameters: | |
| # @raycast.icon π | |
| # @raycast.packageName AI Tools | |
| # @raycast.needsConfirmation false | |
| # Documentation: | |
| # @raycast.author AI Assistant | |
| # @raycast.description Reads URLs from clipboard, processes them with Jina AI in parallel, and copies formatted content | |
| import asyncio | |
| import httpx | |
| import json | |
| import os | |
| import re | |
| import pyperclip | |
| import subprocess | |
| import sys | |
| import traceback | |
| from pathlib import Path | |
| from typing import List, Dict, Tuple, Optional | |
| from urllib.parse import urlparse | |
| # Load environment variables from .env file | |
| SCRIPT_DIR = Path(__file__).parent | |
| ENV_FILE = SCRIPT_DIR / ".env" | |
| def load_env(): | |
| """Load environment variables from .env file.""" | |
| if ENV_FILE.exists(): | |
| with open(ENV_FILE) as f: | |
| for line in f: | |
| line = line.strip() | |
| if line and not line.startswith('#') and '=' in line: | |
| key, value = line.split('=', 1) | |
| key = key.strip() | |
| value = value.strip() | |
| if key and key not in os.environ: | |
| os.environ[key] = value | |
| load_env() | |
| # Configuration | |
| JINA_API_KEY = os.environ.get("JINA_API_KEY", "") | |
| JINA_API_URL = "https://r.jina.ai/" | |
| SOUND_FILE = "/System/Library/Sounds/Pop.aiff" | |
| def get_clipboard_content() -> str: | |
| """Get clipboard content with fallback methods""" | |
| try: | |
| return pyperclip.paste() | |
| except: | |
| try: | |
| result = subprocess.run( | |
| ["pbpaste"], capture_output=True, text=True, check=True | |
| ) | |
| return result.stdout | |
| except subprocess.CalledProcessError: | |
| print("β Could not access clipboard") | |
| sys.exit(1) | |
| def set_clipboard_content(text: str) -> bool: | |
| """Set clipboard content with fallback methods""" | |
| try: | |
| pyperclip.copy(text) | |
| return True | |
| except: | |
| try: | |
| subprocess.run(["pbcopy"], input=text, text=True, check=True) | |
| return True | |
| except subprocess.CalledProcessError: | |
| return False | |
| def play_sound() -> None: | |
| """Play notification sound on macOS""" | |
| try: | |
| subprocess.run(["afplay", SOUND_FILE], capture_output=True, check=True) | |
| except (subprocess.CalledProcessError, FileNotFoundError): | |
| # Silently fail if sound file doesn't exist or afplay is not available | |
| pass | |
| def is_valid_url(url: str) -> bool: | |
| """Check if a string is a valid URL""" | |
| url = url.strip() | |
| if not url: | |
| return False | |
| # Basic URL pattern check | |
| url_pattern = re.compile( | |
| r"^https?://" # http:// or https:// | |
| r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|" # domain... | |
| r"localhost|" # localhost... | |
| r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip | |
| r"(?::\d+)?" # optional port | |
| r"(?:/?|[/?]\S+)$", | |
| re.IGNORECASE, | |
| ) | |
| return url_pattern.match(url) is not None | |
| def extract_urls_from_text(text: str) -> List[str]: | |
| """Extract and validate URLs from text""" | |
| lines = text.strip().split("\n") | |
| urls = [] | |
| for line in lines: | |
| line = line.strip() | |
| if line and is_valid_url(line): | |
| urls.append(line) | |
| return urls | |
| async def fetch_url_content( | |
| client: httpx.AsyncClient, url: str | |
| ) -> Tuple[str, int, Dict]: | |
| """Fetch content from a single URL using Jina AI API""" | |
| headers = { | |
| "Accept": "application/json", | |
| "Authorization": f"Bearer {JINA_API_KEY}", | |
| "Content-Type": "application/json", | |
| "X-Retain-Images": "none", | |
| "X-Return-Format": "markdown", | |
| "X-With-Links-Summary": "true", | |
| } | |
| payload = {"url": url} | |
| try: | |
| response = await client.post( | |
| JINA_API_URL, headers=headers, json=payload, timeout=30.0 | |
| ) | |
| if response.status_code == 200: | |
| data = response.json() | |
| content = data.get("data", {}) | |
| return url, 200, content | |
| else: | |
| try: | |
| error_data = response.json() | |
| error_msg = error_data.get("error", {}).get( | |
| "message", f"HTTP {response.status_code}" | |
| ) | |
| except: | |
| error_msg = f"HTTP {response.status_code} - {response.text[:100]}" | |
| return url, response.status_code, {"error": error_msg} | |
| except httpx.TimeoutException: | |
| return url, 408, {"error": "Request timeout (30s)"} | |
| except httpx.RequestError as e: | |
| return url, 500, {"error": f"Request failed: {str(e)}"} | |
| except Exception as e: | |
| return url, 500, {"error": f"Unexpected error: {str(e)}"} | |
| def format_result(url: str, status_code: int, content: Dict) -> str: | |
| """Format a single URL result""" | |
| output = f"\n========================================\n" | |
| output += f"# URL: {url}\n" | |
| output += f"========================================\n\n" | |
| if status_code == 200 and "error" not in content: | |
| title = content.get("title", "") | |
| description = content.get("description", "") | |
| text_content = content.get("content", "") | |
| if title: | |
| output += f"## {title}\n\n" | |
| if description and description != text_content.split("\n")[0]: | |
| output += f"{description}\n\n" | |
| if text_content: | |
| # Clean up content formatting | |
| lines = text_content.split("\n") | |
| cleaned_lines = [] | |
| prev_line_empty = False | |
| for line in lines: | |
| line = line.strip() | |
| if line: | |
| cleaned_lines.append(line) | |
| prev_line_empty = False | |
| elif not prev_line_empty: | |
| cleaned_lines.append("") | |
| prev_line_empty = True | |
| output += "\n".join(cleaned_lines) | |
| # Add links summary if available | |
| links = content.get("links", {}) | |
| if links and len(links) > 0: | |
| output += "\n\n**Links:**\n" | |
| for text, link_url in links.items(): | |
| output += f"- [{text}]({link_url})\n" | |
| else: | |
| error_msg = content.get("error", "Unknown error") | |
| output += f"**Failed URL - Status Code: {status_code}**\n\n" | |
| output += f"Error: {error_msg}\n" | |
| output += "\n---" | |
| return output | |
| async def main(): | |
| try: | |
| # Check for API key | |
| if not JINA_API_KEY: | |
| print("β JINA_API_KEY not set") | |
| print("π‘ Add to .env file: JINA_API_KEY=your_key_here") | |
| sys.exit(1) | |
| # Get clipboard content | |
| clipboard_text = get_clipboard_content() | |
| if not clipboard_text.strip(): | |
| print("β Clipboard is empty") | |
| sys.exit(1) | |
| # Extract URLs from clipboard | |
| urls = extract_urls_from_text(clipboard_text) | |
| if not urls: | |
| print("β No valid URLs found in clipboard") | |
| print( | |
| "π‘ Please ensure URLs are separated by new lines and start with http:// or https://" | |
| ) | |
| sys.exit(1) | |
| print(f"π Processing {len(urls)} URL(s)...") | |
| # Process URLs in parallel | |
| async with httpx.AsyncClient() as client: | |
| tasks = [fetch_url_content(client, url) for url in urls] | |
| results = await asyncio.gather(*tasks) | |
| # Format all results | |
| final_output = "" | |
| # Sort results by original URL order | |
| url_to_result = {url: (status, content) for url, status, content in results} | |
| for url in urls: | |
| if url in url_to_result: | |
| status_code, content = url_to_result[url] | |
| formatted = format_result(url, status_code, content) | |
| final_output += formatted | |
| # Remove trailing newline and dashes | |
| final_output = final_output.rstrip() | |
| if final_output.endswith("---"): | |
| final_output = final_output[:-3].rstrip() | |
| # Copy to clipboard | |
| if set_clipboard_content(final_output): | |
| print(f"β Successfully processed {len(urls)} URL(s)") | |
| print(f"π Formatted content copied to clipboard") | |
| else: | |
| print("β Failed to copy to clipboard") | |
| print(final_output) | |
| sys.exit(1) | |
| # Play success sound | |
| play_sound() | |
| except ImportError as e: | |
| print(f"β Missing dependency: {e}") | |
| print("π‘ Install required packages with:") | |
| print(" uv pip install --system httpx pyperclip") | |
| print(" Or: uv pip install --python=$(which python3) httpx pyperclip") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"β Unexpected error: {e}") | |
| print("π§ Debug info:") | |
| traceback.print_exc() | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| # Run the async main function | |
| asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment