Created
February 7, 2026 01:19
-
-
Save peterc/a6fd1ea08cb2b45aa35e09279af72916 to your computer and use it in GitHub Desktop.
Transcribe MP3 files using Mistral's Voxtral API with speaker diarization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import json | |
| import os | |
| import sys | |
| from mistralai import Mistral | |
| def format_time(seconds): | |
| minutes = int(seconds) // 60 | |
| secs = int(seconds) % 60 | |
| return f"{minutes:02d}:{secs:02d}" | |
| def transcribe(audio_file): | |
| client = Mistral(api_key=os.environ["MISTRAL_API_KEY"]) | |
| with open(audio_file, "rb") as f: | |
| response = client.audio.transcriptions.complete( | |
| model="voxtral-mini-latest", | |
| file={ | |
| "file_name": os.path.basename(audio_file), | |
| "content": f, | |
| }, | |
| diarize=True, | |
| timestamp_granularities=["segment"], | |
| ) | |
| return json.loads(response.model_dump_json()) | |
| def format_transcript(data): | |
| segments = data["segments"] | |
| merged = [] | |
| for seg in segments: | |
| if merged and seg["speaker_id"] == merged[-1]["speaker_id"]: | |
| merged[-1]["end"] = seg["end"] | |
| merged[-1]["text"] += " " + seg["text"].strip() | |
| else: | |
| merged.append({ | |
| "speaker_id": seg["speaker_id"], | |
| "start": seg["start"], | |
| "end": seg["end"], | |
| "text": seg["text"].strip(), | |
| }) | |
| lines = [] | |
| for seg in merged: | |
| start = format_time(seg["start"]) | |
| end = format_time(seg["end"]) | |
| lines.append(f"{seg['speaker_id']} {start}-{end}: {seg['text']}") | |
| return "\n".join(lines) + "\n" | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print(f"Usage: {sys.argv[0]} <audio_file> [output.txt]", file=sys.stderr) | |
| sys.exit(1) | |
| audio_file = sys.argv[1] | |
| if not os.path.isfile(audio_file): | |
| print(f"Error: file not found: {audio_file}", file=sys.stderr) | |
| sys.exit(1) | |
| print(f"Transcribing {audio_file}...", file=sys.stderr) | |
| data = transcribe(audio_file) | |
| output = format_transcript(data) | |
| if len(sys.argv) >= 3: | |
| with open(sys.argv[2], "w") as f: | |
| f.write(output) | |
| print(f"Written to {sys.argv[2]}", file=sys.stderr) | |
| else: | |
| print(output, end="") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment