Source code for drumscript.audio_processor.onset_detector

# DrumScript/audio_processor/onset_detector.py
"""
This module will detect the onset (start) times of drum hits in the audio.
"""

import argparse
import os

import librosa
import numpy as np

from drumscript.audio_processor.tempo_detector import estimate_tempo
from drumscript.notation_generator.constants import HOP_LENGTH, SAMPLE_RATE


[docs] def detect_onsets(audio_data: np.ndarray, sr: int) -> list[float]: # Detects the onset (start) times of percussive events in an audio signal. # This function uses librosa's built-in onset detection algorithms, which # typically rely on spectral flux or other energy-based methods to identify # sudden changes in the audio signal characteristic of percussive hits. # Args: # audio_data (np.ndarray): The input audio time series. # sr (int): The sample rate of the audio data. # Returns: # list[float]: A list of detected onset times in seconds. if audio_data.size == 0: return [] y_percussive = librosa.effects.percussive(y=audio_data) # --- ENFORCE PHYSICAL DRUMMING LIMITS --- # Because HOP_LENGTH is 128, the frames are very tiny (~2.9ms). # We must explicitly tell Librosa to wait at least 50ms before triggering # a second hit, otherwise it will trigger on cymbal vibrations. lockout_time_secs = 0.05 # wait_frames = int(lockout_time_secs * (SAMPLE_RATE / HOP_LENGTH)) wait_frames = int(lockout_time_secs * (sr / HOP_LENGTH)) print(f"(HOP_LENGTH: {HOP_LENGTH})") print(f"(Wait Frames Applied: {wait_frames})") # Pass the 'wait' and 'delta' constraints directly into the simple wrapper onset_frames = librosa.onset.onset_detect( y=y_percussive, # sr=SAMPLE_RATE, sr=sr, hop_length=HOP_LENGTH, units="frames", wait=wait_frames, # Stops rapid double-triggering on cymbals delta=0.05, # Ignores general background noise floor ) # Commenting out to test single_beat_refinement :) # print(f'(len_onset_frames:{len(onset_frames)})') # Convert onset frames to time in seconds # onset_times = librosa.frames_to_time( # onset_frames, # sr=SAMPLE_RATE, # hop_length=HOP_LENGTH # ) # print(f'(len_onset_times:{len(onset_times)})') # return onset_times.tolist() # Convert onset frames to time in seconds # onset_times = librosa.frames_to_time(onset_frames, sr=SAMPLE_RATE, hop_length=HOP_LENGTH).tolist() onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=HOP_LENGTH).tolist() # --- SINGLE-BEAT REFINEMENT --- # If the total duration is very short (< 2.0s), it's likely a single hit sample. # We apply a stricter "De-bounce" to prevent room reflections from triggering events. duration = len(audio_data) / sr # if duration < 1.0 and len(onset_times) > 1: if duration < 2.0 and len(onset_times) > 1: # We only keep the FIRST onset if others follow too closely (within 150ms) # This fixes the 'single hit showing multiple events' issue in orchestration folder. refined_onsets = [onset_times[0]] for i in range(1, len(onset_times)): if onset_times[i] - refined_onsets[-1] > 0.150: # 150ms threshold refined_onsets.append(onset_times[i]) onset_times = refined_onsets print(f"(len_onset_times:{len(onset_times)})") return onset_times
# ------- AUTOMATIC TEMPO DETECTION------------------------------------ # REPLACED THE FUNCTION THAT WAS HARDCODED TO DETECT TEMPO FROM ONSETS WITH IMPORTED FCT FROM THE TEMPO_DETECTOR SCRIPT """ def calculate_tempo_from_onsets(onset_times: np.ndarray, sr: int) -> float: # #Estimates the tempo (BPM) from a list of onset timestamps. # :param onset_times: Array of onset timestamps in seconds. #:type onset_times: np.ndarray #:param sr: The sampling rate. #:type sr: int #:return: The estimated tempo in BPM. #:rtype: float # if len(onset_times) < 2: return 120.0 # Return a default tempo if not enough onsets are found # FIX: librosa.beat.tempo does NOT accept 'onset_events' without audio/envelope. # We calculate tempo using Inter-Onset Intervals (IOI). # 1. Calculate the time difference between consecutive hits ioi = np.diff(onset_times) # 2. Filter out extremely short or long gaps (e.g., fast rolls or long pauses) # 0.2s = 300 BPM, 1.5s = 40 BPM valid_ioi = ioi[(ioi > 0.2) & (ioi < 1.5)] if len(valid_ioi) > 0: # 3. Take the median interval (median is better than average as it ignores outliers) avg_interval = np.median(valid_ioi) tempo = 60.0 / avg_interval else: tempo = 120.0 # Default if pattern is weird or too sparse return float(tempo) """ if __name__ == "__main__": from drumscript.audio_processor.audio_loader import load_audio, normalise_audio from drumscript.audio_processor.tempo_detector import estimate_tempo # --------------------------------------------------------------------------uncomment during testing # from datetime import datetime # print("\n# ------------------------------------------------------------------------------------") # datetimestamp = datetime.now() # print(f'\ndate/time: {datetimestamp}') # -------------------------------------------------------------------------------------------------- print("Running onset_detector.py example with provided filepath...") # FUTURE: Find way to encode this so it prints the file path provided in CLI try: # Import necessary modules from package # Note: You might need 'from DrumScript.audio_processor.audio_loader import ...' # if running this script directly and 'audio_processor' is not in the Python path. # However, for 'python -m' style execution, 'from audio_processor.audio_loader import ...' is usually correct. # Required for CLI argparsing parser = argparse.ArgumentParser(description="Detect onsets in drum audio.") parser.add_argument("audio_path", help="Path to the input audio file") args = parser.parse_args() # sr = 44100 # Target sample rate for processing # sr = 44100*1.5 # Target sample rate for processing sr = SAMPLE_RATE print(f"sample_rate=sr={sr}") # Print current sample rate applied # --- Get path to audio (cli or import from sister module) current_script_dir = os.path.dirname(os.path.abspath(__file__)) print(f"current_script_dir: {current_script_dir}") # Go up one level from audio_processor/onset_detector.py to the outer DRUMSCRIPT/ folder project_root = os.path.abspath(os.path.join(current_script_dir, "..", "..")) print(f"project_root: {project_root}") audio_path = os.path.abspath(args.audio_path) print(f"audio_path: {audio_path}") print(f"Attempting to load: {audio_path}") # Load and normalise audio # audio_data, sample_rate = load_audio(audio_path, sr=sr) audio_data, sample_rate = load_audio(audio_path, sr=SAMPLE_RATE) normalised_audio = normalise_audio(audio_data) # Detect onsets print(f"Detecting onsets in : {audio_path}") onsets = detect_onsets(normalised_audio, SAMPLE_RATE) print(f"Detected {len(onsets)} onsets.") if onsets: # Print the first few detected onsets for verification # print("\nFirst 10 detected onsets (seconds):") # for i, onset_time in enumerate(onsets[:10]): # if len(onsets) > 10: # print(f" ...and {len(onsets) - 10} more onsets.") # Print * (ALL) detected onsets for now print(f"\n All {len(onsets)} detected onsets (seconds):") for i, onset_time in enumerate(onsets): print(f" Onset {i + 1}: {onset_time:.4f}s") # for i, onset_time in enumerate(onsets): # print(f" Onset {i+1}: {onsets:.4f}s") else: print(f"No onsets detected in audio_path: {audio_path}") # global_tempo = estimate_tempo(audio_data, SAMPLE_RATE, HOP_LENGTH) # tempo = estimate_tempo(audio_data, SAMPLE_RATE, HOP_LENGTH) tempo = estimate_tempo(audio_data, SAMPLE_RATE) # tempo = estimate_tempo(audio_data, SAMPLE_RATE)/2 # temporary fix # tempo = estimate_tempo(audio_data, SAMPLE_RATE)/4 # temporary fix # print(f"Loaded audio: Shape={normalised_audio.shape}, Sample Rate={sample_rate}, Duration={len(normalised_audio)/sample_rate:.2f} seconds, # Tempo={calculate_tempo_from_onsets(onsets, sr=SAMPLE_RATE):2f}") duration = len(normalised_audio) / sample_rate print( f"Loaded audio: Shape=[{normalised_audio.shape}, Sample Rate={sample_rate} (Hz), " f"Hop Length={HOP_LENGTH} (Hz), Duration={duration:.2f} seconds, Tempo={tempo:.2f} BPM]" ) except FileNotFoundError: print(f"\nERROR: The audio file '{audio_path}' was not found.") print(f"\nPlease ensure you have provided the correct path to audio file: {audio_path}") except ImportError as e: print(f"\nERROR: Required modules/libraries might be missing or imports are incorrect: {e}") print("Ensure 'soundfile', 'librosa', 'numpy', and DrumScript modules are correctly installed and structured.") print("For MP3, 'ffmpeg' must also be installed on system and accessible in PATH.") except Exception as e: print(f"\nAn unexpected error occurred during the example execution: {e}") import traceback traceback.print_exc() # Print full traceback for debugging print("\nonset_detector.py example finished.") # --------------------------------------------------------------------------uncomment during testing # from datetime import datetime # print("\n# ------------------------------------------------------------------------------------") # datetimestamp = datetime.now() # print(f'\ndate/time: {datetimestamp}') # --------------------------------------------------------------------------------------------------