Install the following python modules:
pip3 install librosa SpeechRecognitionInstall ffmpeg CLI app
| import os | |
| import math | |
| import librosa # pip3 install librosa | |
| import speech_recognition as sr # pip3 install SpeechRecognition | |
| # install `ffmpeg` | |
| # Goes over every singl e video in the input directory, then converts to mp3, to generate subtitles | |
| inputDir='inputs' # input videos | |
| subDir='subs' # output subtitle directory | |
| outputDir='out' # output mp3 | |
| def makeDir(name: str) -> None: | |
| if not(os.path.exists(name)): | |
| os.mkdir(name) | |
| makeDir(inputDir) | |
| makeDir(subDir) | |
| makeDir(outputDir) | |
| for file in os.listdir(inputDir): | |
| inputFile = os.path.join(inputDir, file) | |
| # checking if it is a file | |
| if os.path.isfile(inputFile): | |
| filename = file.split('.')[0] | |
| print(f'- Processing "{filename}":') | |
| outputFile = f'{outputDir}/{filename}.wav' | |
| # generate audio if doesn't exist | |
| if not(os.path.exists(outputFile)): | |
| print(f'\t* [Process] Converting {filename} to wav...') | |
| command2wav = f'ffmpeg -i {inputFile} {outputDir}/{filename}.wav 2> /dev/null' | |
| os.system(command2wav) | |
| print(f'\t* [Done] {filename} converted to wav.') | |
| else: | |
| print(f'\t* [Skip] {filename} Audio file already exist.') | |
| subFile = f'{subDir}/{filename}.txt' | |
| if not(os.path.exists(subFile)): | |
| print(f'\t* [Process] Generating subtitle for {filename}...') | |
| r = sr.Recognizer() | |
| # Generate subtitle by chunk per 120 seconds | |
| durationPerCycle = 120 | |
| totalDuration = math.floor(librosa.get_duration(path=outputFile)) | |
| duration = 0 | |
| offset = 0 | |
| # Loop until the entire video is generated | |
| while duration < totalDuration: | |
| offset += duration | |
| duration += durationPerCycle | |
| # if 5 seconds left, skip it. Edit this value if 5 seconds fail for you | |
| if (totalDuration - (duration - durationPerCycle)) < 5: | |
| offset = duration - durationPerCycle | |
| duration = totalDuration | |
| print(f'\t\t* [Fail] Unable to transcribe from {offset / 60} to {duration / 60} minutes') | |
| break; | |
| # set the remaining duration if duration is less than per cycle | |
| if duration >= totalDuration: | |
| offset = duration - durationPerCycle | |
| duration = totalDuration | |
| print(f'\t\t* Transcribing from {offset / 60} to {duration / 60} out of {totalDuration / 60} minutes.') | |
| audio = sr.AudioFile(outputFile) | |
| with audio as source: | |
| audio = r.record(source, duration=duration, offset=offset) | |
| transcribe = r.recognize_google(audio) | |
| # Append to subtitle file | |
| with open(f'{subDir}/{filename}.txt', 'a') as subOutFile: | |
| subOutFile.write('\n'+transcribe) | |
| print(f'\t\t* [Done] Subtitle generated from {offset / 60} to {duration / 60} for {filename}.') | |
| print(f'\t* [Complete] {filename} is processed!\n') | |
| else: | |
| print(f'\t* [Skip] Subtitle for {filename} is already exist.\n') |