Spaces:
Sleeping
Sleeping
File size: 4,771 Bytes
338e293 0ddae63 338e293 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
from inference import get_clap_embeddings_from_audio, get_clap_embeddings_from_text
from pedalboard import Pedalboard, Reverb, HighpassFilter, LowpassFilter, Distortion, Bitcrush
from sklearn.metrics.pairwise import cosine_similarity
import soundfile as sf
from skopt import gp_minimize
from skopt.space import Real
import librosa
import numpy as np
import os
def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
"""Stitch together all drum sounds into one audio file."""
all_audio = []
sr = 48000
for instrument, samples in drum_kit.items():
for sample in samples:
audio, _ = librosa.load(sample, sr=48000)
all_audio.append(audio)
# Concatenate all sounds with a small silence gap
gap = np.zeros(int(sr * 0.2)) # 200ms silence between sounds
full_audio = np.concatenate([item for audio in all_audio for item in (audio, gap)])
# Save to temp file
sf.write(output_path, full_audio, sr)
return output_path
def evaluate_fitness(audio_path, text_embed):
"""Compute similarity between processed audio and text query."""
audio_embed = get_clap_embeddings_from_audio(audio_path)
return cosine_similarity([text_embed], [audio_embed])[0][0]
def apply_fx(audio_path, params, write_wav=True, output_dir="processed_audio"):
"""Apply EQ and Reverb to an audio file and return the modified file path."""
audio, sr = librosa.load(audio_path, sr=48000)
board = Pedalboard([
LowpassFilter(cutoff_frequency_hz=params['lowpass']),
HighpassFilter(cutoff_frequency_hz=params['highpass']),
Distortion(drive_db=params['drive_db']),
Bitcrush(bit_depth=params['bit_depth']),
Reverb(room_size=params['reverb_size'], wet_level=params['reverb_wet'])
])
processed_audio = board(audio, sr)
if write_wav:
# Determine output directory dynamically
base_dir = os.path.dirname(os.path.dirname(audio_path)) # Get 'dataset' level
output_dir = os.path.join(base_dir, output_dir)
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)
# Create new file path inside the processed_sounds directory
file_name = os.path.basename(audio_path).replace(".wav", "_processed.wav")
output_path = os.path.join(output_dir, file_name)
# Save processed audio
sf.write(output_path, processed_audio, sr)
return output_path
else:
return processed_audio
def objective_function(params, audio_file, text_embedding):
"""Objective function for Bayesian Optimization using the concatenated file."""
processed_audio = apply_fx(audio_file, {
"lowpass": params[0],
"highpass": params[1],
"reverb_size": params[2],
"reverb_wet": params[3],
"drive_db": params[4],
"bit_depth": params[5]
}, write_wav=True)
similarity = evaluate_fitness(processed_audio, text_embedding)
return -similarity # Minimize negative similarity (maximize similarity)
def get_params_dict(params_list):
return {
"lowpass cutoff (Hz)": params_list[0],
"highpass cutoff (Hz)": params_list[1],
"reverb size": params_list[2],
"reverb mix": params_list[3],
"distortion - gain_db": params_list[4],
"bitcrush - bit depth": params_list[5]
}
# Define parameter search space
search_space = [
Real(5000, 15000, name="lowpass"),
Real(50, 1000, name="highpass"),
Real(0.0, 0.8, name="reverb_size"),
Real(0.0, 1.0, name="reverb_wet"),
Real(0.0, 20.0, name="drive_db"),
Real(6.0, 32.0, name="bit_depth")
]
##### Main function #####
def get_fx(drum_kit, fx_prompt):
"""Optimize FX settings for the entire drum kit by using a concatenated audio file."""
text_embedding = get_clap_embeddings_from_text(fx_prompt)
# Concatenate all drum sounds
concat_file = concatenate_sounds(drum_kit)
# Define the objective function for the concatenated file
def obj_func(params):
return objective_function(params, concat_file, text_embedding)
# Run Bayesian optimization
res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
best_params = res.x
# Apply the best FX parameters to each individual sound
optimized_kit = {}
for instrument, samples in drum_kit.items():
optimized_kit[instrument] = [apply_fx(sample, {
"lowpass": best_params[0],
"highpass": best_params[1],
"reverb_size": best_params[2],
"reverb_wet": best_params[3],
"drive_db": best_params[4],
"bit_depth": best_params[5]
}, write_wav=True) for sample in samples]
return optimized_kit, get_params_dict(best_params) |