Skip to content

Instantly share code, notes, and snippets.

@BHznJNs
Created July 22, 2025 15:57
Show Gist options
  • Select an option

  • Save BHznJNs/eb12f09f668107939e7d28c63bf4c6fa to your computer and use it in GitHub Desktop.

Select an option

Save BHznJNs/eb12f09f668107939e7d28c63bf4c6fa to your computer and use it in GitHub Desktop.
An audio preprocessor module for applications that uses Whisper or Faster-Whisper to STT.
import numpy as np
import noisereduce as nr
from loguru import logger
from pydub import AudioSegment
from pydub.silence import split_on_silence
def load_audio_file(audio_path: str) -> AudioSegment | None:
try:
audio = AudioSegment\
.from_file(audio_path)\
.set_channels(1)
return audio
except Exception as e:
logger.error(f"Failed to load the audio file: {e}")
return None
def reduce_noise(audio: AudioSegment) -> AudioSegment:
# resolves audio data into the format that noisereduce can handle
samplerate = audio.frame_rate
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
reduced_noise_samples = nr.reduce_noise(y=samples, sr=samplerate, prop_decrease=0.8)
processed_audio = AudioSegment(
reduced_noise_samples.astype(np.int16).tobytes(),
frame_rate=samplerate,
sample_width=2, # 16-bit audio
channels=1
)
return processed_audio
def calculate_silence_threshold(audio: AudioSegment) -> float:
candidate_thresh = audio.dBFS - 8
safe_floor_thresh = -50.0
final_thresh = min(candidate_thresh, safe_floor_thresh)
return final_thresh
def audio_preprocess(audio_path: str) -> bool:
audio = load_audio_file(audio_path)
if audio is None:
return False
processed_audio = reduce_noise(audio)
silence_thresh = calculate_silence_threshold(processed_audio)
chunks = split_on_silence(
processed_audio,
min_silence_len=500,
silence_thresh=int(silence_thresh),
keep_silence=150,
)
# merge the splited audio data
if not chunks:
logger.warning("Failed detect the voice segment. will use the original audio file.")
return False
else:
final_audio = AudioSegment.empty()
for chunk in chunks:
final_audio += chunk
try:
final_audio.export(audio_path, format="wav")
return True
except Exception as e:
logger.error(f"Failed to export the processed audio data: {e}")
return False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment