# DrumScript/audio_processor/tempo_detector.py
# ------------------------------------------------------------------------------------------------------------
"""
This module contains functions for automatic tempo detection from audio data.
"""
# Import packages: ------------------------------------------------------------------------------------------------
import argparse
import librosa
import numpy as np
from drumscript.notation_generator.constants import SAMPLE_RATE
[docs]
def estimate_tempo(audio_path, sr):
"""
Estimates tempo from the tempogram, but restricted to a plausible range.
(Corrected to avoid INF and extreme BPM errors, ie 10500 BPM).
Returns a default 120.0 BPM if the audio is too short to analyze.
:param audio_path: The input audio time series.
:type audio_path: np.ndarray
:param sr: Sampling rate of the audio.
:type sr: int
:return: The estimated tempo in Beats Per Minute (BPM).
:rtype: float
"""
if audio_path.size == 0:
return 0.0
# Check if there are enough hits in the audio
# Calculating tempo on clips shorter than ~1-2 seconds is unreliable and often produces artifacts (like 235 BPM for a single kick).
duration_seconds = audio_path.shape[0] / sr
if duration_seconds < 1.0: # duration_seconds less than 1 second, ie anything over 1 sec duration is valid
print(f"Audio too short for tempo detection ({duration_seconds:.2f}s). Defaulting to 120 BPM.")
return 120.0
# oenv = librosa.onset.onset_strength(y=audio_path, sr=sr, hop_length=256)
oenv = librosa.onset.onset_strength(y=audio_path)
tempogram = librosa.feature.tempogram(onset_envelope=oenv)
tempo_spectrum = np.sum(tempogram, axis=1)
tempo_freqs = librosa.tempo_frequencies(tempogram.shape[0])
# --- Fix for extreme BPM error ---
# Create a mask to only consider tempos in a plausible musical range (e.g., 60-240 BPM)
plausible_tempos_mask = (tempo_freqs >= 60) & (tempo_freqs <= 240)
# Find the index of the peak within the plausible range
plausible_spectrum = tempo_spectrum[plausible_tempos_mask]
if plausible_spectrum.size == 0:
return 120.0 # Return default if no energy in plausible range
peak_idx_in_plausible_range = np.argmax(plausible_spectrum)
# Convert that index back to a BPM value
plausible_tempo_freqs = tempo_freqs[plausible_tempos_mask]
estimated_bpm = plausible_tempo_freqs[peak_idx_in_plausible_range]
return estimated_bpm
# =====================================================================================================
# MAIN BLOCK - for local testing of this function
if __name__ == "__main__":
# uncomment during testing
# from datetime import datetime
# print("\n# ------------------------------------------------------------------------------------")
# datetimestamp = datetime.now()
# print(f'\ndate/time: {datetimestamp}')
from drumscript.audio_processor.audio_loader import load_audio, normalise_audio
from drumscript.notation_generator.constants import SAMPLE_RATE
parser = argparse.ArgumentParser(description="Estimate the tempo of an audio file.")
# parser.add_argument("audio_file_path", type=str, help="Path to the audio file to be processed.")
parser.add_argument("audio_path", type=str, help="Path to the audio file to be processed.")
args = parser.parse_args()
# actual_drum_recording_path = args.audio_file_path # audio_file_path, relative to ROOT, not the path of this script
audio_path = args.audio_path # audio_path, relative to ROOT, not the path of this script
sr = SAMPLE_RATE
try:
# Load and normalise the audio
# print(f"Attempting to load: {actual_drum_recording_path}")
print(f"Attempting to load: {audio_path}")
# audio, sr = load_audio(actual_drum_recording_path, sr=44100)
# audio, sr = load_audio(actual_drum_recording_path, sr=sr)
audio, sr = load_audio(audio_path, sr=sr)
normalised_audio = normalise_audio(audio)
# Estimate the tempo
bpm = estimate_tempo(normalised_audio, sr)
print(f"Estimated Tempo: {int(round(bpm))} BPM")
except Exception as e:
print(f"\nAn unexpected error occurred: {e}")
# print("\n#==================================================================================================")
# ------------------------------------------------------------------------------------------------------