Skip to content

Instantly share code, notes, and snippets.

@peterc
Created February 7, 2026 01:19
Show Gist options
  • Select an option

  • Save peterc/a6fd1ea08cb2b45aa35e09279af72916 to your computer and use it in GitHub Desktop.

Select an option

Save peterc/a6fd1ea08cb2b45aa35e09279af72916 to your computer and use it in GitHub Desktop.
Transcribe MP3 files using Mistral's Voxtral API with speaker diarization
#!/usr/bin/env python3
import json
import os
import sys
from mistralai import Mistral
def format_time(seconds):
minutes = int(seconds) // 60
secs = int(seconds) % 60
return f"{minutes:02d}:{secs:02d}"
def transcribe(audio_file):
client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])
with open(audio_file, "rb") as f:
response = client.audio.transcriptions.complete(
model="voxtral-mini-latest",
file={
"file_name": os.path.basename(audio_file),
"content": f,
},
diarize=True,
timestamp_granularities=["segment"],
)
return json.loads(response.model_dump_json())
def format_transcript(data):
segments = data["segments"]
merged = []
for seg in segments:
if merged and seg["speaker_id"] == merged[-1]["speaker_id"]:
merged[-1]["end"] = seg["end"]
merged[-1]["text"] += " " + seg["text"].strip()
else:
merged.append({
"speaker_id": seg["speaker_id"],
"start": seg["start"],
"end": seg["end"],
"text": seg["text"].strip(),
})
lines = []
for seg in merged:
start = format_time(seg["start"])
end = format_time(seg["end"])
lines.append(f"{seg['speaker_id']} {start}-{end}: {seg['text']}")
return "\n".join(lines) + "\n"
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <audio_file> [output.txt]", file=sys.stderr)
sys.exit(1)
audio_file = sys.argv[1]
if not os.path.isfile(audio_file):
print(f"Error: file not found: {audio_file}", file=sys.stderr)
sys.exit(1)
print(f"Transcribing {audio_file}...", file=sys.stderr)
data = transcribe(audio_file)
output = format_transcript(data)
if len(sys.argv) >= 3:
with open(sys.argv[2], "w") as f:
f.write(output)
print(f"Written to {sys.argv[2]}", file=sys.stderr)
else:
print(output, end="")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment