Skip to content

Instantly share code, notes, and snippets.

@willbarrett
Created September 10, 2024 20:43
Show Gist options
  • Select an option

  • Save willbarrett/efba64178aebb7631d8a7f4749cdff70 to your computer and use it in GitHub Desktop.

Select an option

Save willbarrett/efba64178aebb7631d8a7f4749cdff70 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import argparse
from datetime import datetime as dt
import os
import re
import shutil
import subprocess
DEVICE_PATH = "/Volumes/NO NAME/RECORDER/FOLDER_A/"
DESTINATION_PATH = "/Users/will/Library/CloudStorage/GoogleDrive-will@barrettventures.co/My Drive/Voice Notes/"
MP3_MATCH_REGEX = r".*\.MP3$"
parser = argparse.ArgumentParser(
prog="DailyTranscriber",
description="Copies and transcribes off of a voice note device",
epilog="No transcriber transcribes harder.")
parser.add_argument('-s', '--skip', choices=['copy'])
args = parser.parse_args()
if not args.skip == 'copy':
recordings = []
for file in os.listdir(DEVICE_PATH):
if re.match(MP3_MATCH_REGEX, file):
recordings.append(file)
for recording in recordings:
modified = os.path.getmtime(DEVICE_PATH + recording)
date = dt.fromtimestamp(modified).strftime("%Y-%m-%d")
target_folder = DESTINATION_PATH + date
destination = target_folder + "/" + recording
if not os.path.exists(target_folder):
os.mkdir(target_folder)
if os.path.exists(destination):
next
else:
print("Copying recording" + recording)
shutil.copyfile(DEVICE_PATH + recording, destination)
print("COPYING DONE - STARTING TRANSCRIPTION")
for root, dirs, files in os.walk(DESTINATION_PATH):
for directory in dirs:
print("Considering directory " + directory)
needs_transcription = []
for file in os.listdir(directory):
if re.match(MP3_MATCH_REGEX, file):
transcription_path = os.path.join(root, directory, file.replace("MP3", "txt"))
if not os.path.exists(transcription_path):
needs_transcription.append(os.path.join(root, directory, file))
if len(needs_transcription) > 0:
print("Transcribing " + str(len(needs_transcription)) + " recordings")
for file in needs_transcription:
whisper_command_segments = [
"whisper",
file,
"--model",
"small.en",
"--output_dir",
os.path.join(root, directory),
"--output_format",
"txt",
"--language",
"en",
"--threads",
"7"
]
result = subprocess.run(whisper_command_segments)
else:
print("Nothing to do, moving on...")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment