Source code for drumscript.audio_processor.tempo_detector

# DrumScript/audio_processor/tempo_detector.py
# ------------------------------------------------------------------------------------------------------------
"""
This module contains functions for automatic tempo detection from audio data.
"""
# Import packages: ------------------------------------------------------------------------------------------------

import argparse

import librosa
import numpy as np

from drumscript.notation_generator.constants import SAMPLE_RATE


[docs] def estimate_tempo(audio_path, sr): """ Estimates tempo from the tempogram, but restricted to a plausible range. (Corrected to avoid INF and extreme BPM errors, ie 10500 BPM). Returns a default 120.0 BPM if the audio is too short to analyze. :param audio_path: The input audio time series. :type audio_path: np.ndarray :param sr: Sampling rate of the audio. :type sr: int :return: The estimated tempo in Beats Per Minute (BPM). :rtype: float """ if audio_path.size == 0: return 0.0 # Check if there are enough hits in the audio # Calculating tempo on clips shorter than ~1-2 seconds is unreliable and often produces artifacts (like 235 BPM for a single kick). duration_seconds = audio_path.shape[0] / sr if duration_seconds < 1.0: # duration_seconds less than 1 second, ie anything over 1 sec duration is valid print(f"Audio too short for tempo detection ({duration_seconds:.2f}s). Defaulting to 120 BPM.") return 120.0 # oenv = librosa.onset.onset_strength(y=audio_path, sr=sr, hop_length=256) oenv = librosa.onset.onset_strength(y=audio_path) tempogram = librosa.feature.tempogram(onset_envelope=oenv) tempo_spectrum = np.sum(tempogram, axis=1) tempo_freqs = librosa.tempo_frequencies(tempogram.shape[0]) # --- Fix for extreme BPM error --- # Create a mask to only consider tempos in a plausible musical range (e.g., 60-240 BPM) plausible_tempos_mask = (tempo_freqs >= 60) & (tempo_freqs <= 240) # Find the index of the peak within the plausible range plausible_spectrum = tempo_spectrum[plausible_tempos_mask] if plausible_spectrum.size == 0: return 120.0 # Return default if no energy in plausible range peak_idx_in_plausible_range = np.argmax(plausible_spectrum) # Convert that index back to a BPM value plausible_tempo_freqs = tempo_freqs[plausible_tempos_mask] estimated_bpm = plausible_tempo_freqs[peak_idx_in_plausible_range] return estimated_bpm
# ===================================================================================================== # MAIN BLOCK - for local testing of this function if __name__ == "__main__": # uncomment during testing # from datetime import datetime # print("\n# ------------------------------------------------------------------------------------") # datetimestamp = datetime.now() # print(f'\ndate/time: {datetimestamp}') from drumscript.audio_processor.audio_loader import load_audio, normalise_audio from drumscript.notation_generator.constants import SAMPLE_RATE parser = argparse.ArgumentParser(description="Estimate the tempo of an audio file.") # parser.add_argument("audio_file_path", type=str, help="Path to the audio file to be processed.") parser.add_argument("audio_path", type=str, help="Path to the audio file to be processed.") args = parser.parse_args() # actual_drum_recording_path = args.audio_file_path # audio_file_path, relative to ROOT, not the path of this script audio_path = args.audio_path # audio_path, relative to ROOT, not the path of this script sr = SAMPLE_RATE try: # Load and normalise the audio # print(f"Attempting to load: {actual_drum_recording_path}") print(f"Attempting to load: {audio_path}") # audio, sr = load_audio(actual_drum_recording_path, sr=44100) # audio, sr = load_audio(actual_drum_recording_path, sr=sr) audio, sr = load_audio(audio_path, sr=sr) normalised_audio = normalise_audio(audio) # Estimate the tempo bpm = estimate_tempo(normalised_audio, sr) print(f"Estimated Tempo: {int(round(bpm))} BPM") except Exception as e: print(f"\nAn unexpected error occurred: {e}") # print("\n#==================================================================================================") # ------------------------------------------------------------------------------------------------------