Created
February 27, 2025 16:41
-
-
Save conspirator/7df47f68548b4037524bc2df51fd3d2c to your computer and use it in GitHub Desktop.
A simple command-line text-to-speech tool using Kokoro-ONNX. Pipe text to convert it to speech with options to play audio directly or save to file, with configurable voice, speed, and language settings.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "kokoro-onnx", | |
| # "sounddevice", | |
| # "soundfile", | |
| # "requests", | |
| # ] | |
| # /// | |
| """ | |
| Text-to-speech script using kokoro-onnx | |
| Assumes UV is installed. If not, install it with: | |
| curl -LsSf https://astral.sh/uv/install.sh | sh | |
| Usage: | |
| echo "Hello, world." | ./tts # Play audio directly | |
| echo "Hello, world." | ./tts --save # Save to default location | |
| echo "Hello, world." | ./tts --save --path /path/to/file.wav # Save to custom path | |
| cat test.txt | ./tts --speed 1.5 # Play at faster speed | |
| """ | |
| import argparse | |
| import sys | |
| import sounddevice as sd | |
| import soundfile as sf | |
| from kokoro_onnx import Kokoro | |
| from pathlib import Path | |
| import requests | |
| import os | |
| import time | |
| from typing import Tuple | |
| # Define cache directory in user's home | |
| CACHE_DIR = Path.home() / ".cache" / "kokoro-tts" | |
| MODEL_PATH = CACHE_DIR / "kokoro-v1.0.onnx" | |
| VOICES_PATH = CACHE_DIR / "voices-v1.0.bin" | |
| MODEL_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx" | |
| VOICES_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin" | |
| # Default save directory (Castro sideloads directory) | |
| DEFAULT_SAVE_DIR = os.path.expanduser("~/Downloads") | |
| def download_file(url: str, path: Path) -> None: | |
| """Download a file if it doesn't exist.""" | |
| if not path.exists(): | |
| print(f"Downloading {path.name}...") | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| response = requests.get(url, stream=True) | |
| response.raise_for_status() | |
| with open(path, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| print(f"Downloaded {path.name}") | |
| def ensure_model_files() -> Tuple[Path, Path]: | |
| """Ensure model files exist, downloading if necessary.""" | |
| download_file(MODEL_URL, MODEL_PATH) | |
| download_file(VOICES_URL, VOICES_PATH) | |
| return MODEL_PATH, VOICES_PATH | |
| def process_text(text, voice="af_sky", speed=1.0, lang="en-us", save=False, path=None): | |
| """Process text to speech and either play it or save it to a file.""" | |
| model_path, voices_path = ensure_model_files() | |
| kokoro = Kokoro(str(model_path), str(voices_path)) | |
| samples, sample_rate = kokoro.create( | |
| text, voice=voice, speed=speed, lang=lang | |
| ) | |
| if save: | |
| # Save to file | |
| if path: | |
| # Use provided path | |
| file_path = path | |
| # Ensure directory exists | |
| os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True) | |
| else: | |
| # Use default path with timestamp | |
| timestamp = int(time.time()) | |
| os.makedirs(DEFAULT_SAVE_DIR, exist_ok=True) | |
| file_path = os.path.join(DEFAULT_SAVE_DIR, f"audio_{timestamp}.wav") | |
| sf.write(file_path, samples, sample_rate) | |
| print(f"Saved audio to {file_path}") | |
| else: | |
| # Play directly | |
| print(f"Playing: {text}") | |
| sd.play(samples, sample_rate) | |
| sd.wait() | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Text-to-speech with configurable options') | |
| parser.add_argument('--voice', type=str, default="af_sky", | |
| help='Voice to use (default: af_sky)') | |
| parser.add_argument('--speed', type=float, default=1.0, | |
| help='Speech speed multiplier (default: 1.0)') | |
| parser.add_argument('--lang', type=str, default="en-us", | |
| help='Language code (default: en-us)') | |
| parser.add_argument('--save', action='store_true', | |
| help='Save audio to file instead of playing') | |
| parser.add_argument('--path', type=str, | |
| help='Custom path to save the audio file (used with --save)') | |
| args = parser.parse_args() | |
| try: | |
| # Read text from stdin | |
| text = sys.stdin.read().strip() | |
| if text: | |
| process_text( | |
| text, | |
| voice=args.voice, | |
| speed=args.speed, | |
| lang=args.lang, | |
| save=args.save, | |
| path=args.path | |
| ) | |
| else: | |
| print("Error: No input provided. Please pipe some text to the script.") | |
| sys.exit(1) | |
| except FileNotFoundError as e: | |
| print(f"Error: {e}") | |
| sys.exit(1) | |
| except requests.RequestException as e: | |
| print(f"Error downloading model files: {e}") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment