willbarrett · September 10, 2024 20:43
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/usr/bin/python3

 import argparse
 from datetime import datetime  as dt
 import os
 import re
 import shutil
 import subprocess

 DEVICE_PATH = "/Volumes/NO NAME/RECORDER/FOLDER_A/"
 DESTINATION_PATH = "/Users/will/Library/CloudStorage/GoogleDrive-will@barrettventures.co/My Drive/Voice Notes/"

 MP3_MATCH_REGEX = r".*\.MP3$"

 parser = argparse.ArgumentParser(
        prog="DailyTranscriber",
        description="Copies and transcribes off of a voice note device",
        epilog="No transcriber transcribes harder.")

 parser.add_argument('-s', '--skip', choices=['copy'])

 args = parser.parse_args()

 if not args.skip == 'copy':
    recordings = []

    for file in os.listdir(DEVICE_PATH):
        if re.match(MP3_MATCH_REGEX, file):
            recordings.append(file)

    for recording in recordings:
        modified = os.path.getmtime(DEVICE_PATH + recording)
        date = dt.fromtimestamp(modified).strftime("%Y-%m-%d")

        target_folder = DESTINATION_PATH + date

        destination = target_folder + "/" + recording

        if not os.path.exists(target_folder):
            os.mkdir(target_folder)

        if os.path.exists(destination):
            next
        else:
            print("Copying recording" + recording)
            shutil.copyfile(DEVICE_PATH + recording, destination)

 print("COPYING DONE - STARTING TRANSCRIPTION")


 for root, dirs, files in os.walk(DESTINATION_PATH):
    for directory in dirs:
        print("Considering directory " + directory)
        needs_transcription = []

        for file in os.listdir(directory):
            if re.match(MP3_MATCH_REGEX, file):
                transcription_path = os.path.join(root, directory, file.replace("MP3", "txt"))
                if not os.path.exists(transcription_path):
                    needs_transcription.append(os.path.join(root, directory, file))

        if len(needs_transcription) > 0:
            print("Transcribing " + str(len(needs_transcription)) + " recordings")
            for file in needs_transcription:
                whisper_command_segments = [
                    "whisper",
                    file,
                    "--model",
                    "small.en",
                    "--output_dir",
                    os.path.join(root, directory),
                    "--output_format",
                    "txt",
                    "--language",
                    "en",
                    "--threads",
                    "7"
                ]

                result = subprocess.run(whisper_command_segments)
        else:
            print("Nothing to do, moving on...")
	#!/usr/bin/python3

	import argparse
	from datetime import datetime as dt
	import os
	import re
	import shutil
	import subprocess

	DEVICE_PATH = "/Volumes/NO NAME/RECORDER/FOLDER_A/"
	DESTINATION_PATH = "/Users/will/Library/CloudStorage/GoogleDrive-will@barrettventures.co/My Drive/Voice Notes/"

	MP3_MATCH_REGEX = r".*\.MP3$"

	parser = argparse.ArgumentParser(
	prog="DailyTranscriber",
	description="Copies and transcribes off of a voice note device",
	epilog="No transcriber transcribes harder.")

	parser.add_argument('-s', '--skip', choices=['copy'])

	args = parser.parse_args()

	if not args.skip == 'copy':
	recordings = []

	for file in os.listdir(DEVICE_PATH):
	if re.match(MP3_MATCH_REGEX, file):
	recordings.append(file)

	for recording in recordings:
	modified = os.path.getmtime(DEVICE_PATH + recording)
	date = dt.fromtimestamp(modified).strftime("%Y-%m-%d")

	target_folder = DESTINATION_PATH + date

	destination = target_folder + "/" + recording

	if not os.path.exists(target_folder):
	os.mkdir(target_folder)

	if os.path.exists(destination):
	next
	else:
	print("Copying recording" + recording)
	shutil.copyfile(DEVICE_PATH + recording, destination)

	print("COPYING DONE - STARTING TRANSCRIPTION")


	for root, dirs, files in os.walk(DESTINATION_PATH):
	for directory in dirs:
	print("Considering directory " + directory)
	needs_transcription = []

	for file in os.listdir(directory):
	if re.match(MP3_MATCH_REGEX, file):
	transcription_path = os.path.join(root, directory, file.replace("MP3", "txt"))
	if not os.path.exists(transcription_path):
	needs_transcription.append(os.path.join(root, directory, file))

	if len(needs_transcription) > 0:
	print("Transcribing " + str(len(needs_transcription)) + " recordings")
	for file in needs_transcription:
	whisper_command_segments = [
	"whisper",
	file,
	"--model",
	"small.en",
	"--output_dir",
	os.path.join(root, directory),
	"--output_format",
	"txt",
	"--language",
	"en",
	"--threads",
	"7"
	]

	result = subprocess.run(whisper_command_segments)
	else:
	print("Nothing to do, moving on...")
No results found