Created
September 10, 2024 20:43
-
-
Save willbarrett/efba64178aebb7631d8a7f4749cdff70 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python3 | |
| import argparse | |
| from datetime import datetime as dt | |
| import os | |
| import re | |
| import shutil | |
| import subprocess | |
| DEVICE_PATH = "/Volumes/NO NAME/RECORDER/FOLDER_A/" | |
| DESTINATION_PATH = "/Users/will/Library/CloudStorage/GoogleDrive-will@barrettventures.co/My Drive/Voice Notes/" | |
| MP3_MATCH_REGEX = r".*\.MP3$" | |
| parser = argparse.ArgumentParser( | |
| prog="DailyTranscriber", | |
| description="Copies and transcribes off of a voice note device", | |
| epilog="No transcriber transcribes harder.") | |
| parser.add_argument('-s', '--skip', choices=['copy']) | |
| args = parser.parse_args() | |
| if not args.skip == 'copy': | |
| recordings = [] | |
| for file in os.listdir(DEVICE_PATH): | |
| if re.match(MP3_MATCH_REGEX, file): | |
| recordings.append(file) | |
| for recording in recordings: | |
| modified = os.path.getmtime(DEVICE_PATH + recording) | |
| date = dt.fromtimestamp(modified).strftime("%Y-%m-%d") | |
| target_folder = DESTINATION_PATH + date | |
| destination = target_folder + "/" + recording | |
| if not os.path.exists(target_folder): | |
| os.mkdir(target_folder) | |
| if os.path.exists(destination): | |
| next | |
| else: | |
| print("Copying recording" + recording) | |
| shutil.copyfile(DEVICE_PATH + recording, destination) | |
| print("COPYING DONE - STARTING TRANSCRIPTION") | |
| for root, dirs, files in os.walk(DESTINATION_PATH): | |
| for directory in dirs: | |
| print("Considering directory " + directory) | |
| needs_transcription = [] | |
| for file in os.listdir(directory): | |
| if re.match(MP3_MATCH_REGEX, file): | |
| transcription_path = os.path.join(root, directory, file.replace("MP3", "txt")) | |
| if not os.path.exists(transcription_path): | |
| needs_transcription.append(os.path.join(root, directory, file)) | |
| if len(needs_transcription) > 0: | |
| print("Transcribing " + str(len(needs_transcription)) + " recordings") | |
| for file in needs_transcription: | |
| whisper_command_segments = [ | |
| "whisper", | |
| file, | |
| "--model", | |
| "small.en", | |
| "--output_dir", | |
| os.path.join(root, directory), | |
| "--output_format", | |
| "txt", | |
| "--language", | |
| "en", | |
| "--threads", | |
| "7" | |
| ] | |
| result = subprocess.run(whisper_command_segments) | |
| else: | |
| print("Nothing to do, moving on...") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment