Update appcud12.py
Browse filesbugs fixing cuda 12 reworking
- appcud12.py +178 -62
appcud12.py
CHANGED
@@ -12,24 +12,42 @@ from torch.cuda.amp import autocast
|
|
12 |
import warnings
|
13 |
import random
|
14 |
import traceback
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# Suppress warnings for cleaner output
|
17 |
warnings.filterwarnings("ignore")
|
18 |
|
19 |
# Set PYTORCH_CUDA_ALLOC_CONF for CUDA 12
|
20 |
-
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:
|
21 |
|
22 |
# Optimize for CUDA 12
|
23 |
torch.backends.cudnn.benchmark = False
|
24 |
torch.backends.cudnn.deterministic = True
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
# Device setup
|
27 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
28 |
if device != "cuda":
|
29 |
-
|
30 |
sys.exit(1)
|
31 |
-
|
32 |
-
|
33 |
|
34 |
# Memory cleanup function
|
35 |
def clean_memory():
|
@@ -37,27 +55,31 @@ def clean_memory():
|
|
37 |
gc.collect()
|
38 |
torch.cuda.ipc_collect()
|
39 |
torch.cuda.synchronize()
|
40 |
-
|
|
|
|
|
|
|
41 |
|
42 |
# Pre-run memory cleanup
|
43 |
clean_memory()
|
44 |
|
45 |
# Load MusicGen medium model into VRAM
|
46 |
try:
|
47 |
-
|
48 |
local_model_path = "./models/musicgen-medium"
|
49 |
if not os.path.exists(local_model_path):
|
50 |
-
|
51 |
-
|
52 |
sys.exit(1)
|
53 |
musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
|
54 |
musicgen_model.set_generation_params(
|
55 |
duration=30, # Strict 30s max per chunk
|
56 |
two_step_cfg=False
|
57 |
)
|
58 |
-
|
59 |
except Exception as e:
|
60 |
-
|
|
|
61 |
sys.exit(1)
|
62 |
|
63 |
# Check disk space
|
@@ -65,11 +87,12 @@ def check_disk_space(path="."):
|
|
65 |
stat = os.statvfs(path)
|
66 |
free_space = stat.f_bavail * stat.f_frsize / (1024**3) # Free space in GB
|
67 |
if free_space < 1.0:
|
68 |
-
|
69 |
return free_space >= 1.0
|
70 |
|
71 |
# Audio processing functions (CPU-based)
|
72 |
-
def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=
|
|
|
73 |
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
74 |
if audio_segment.channels == 2:
|
75 |
stereo_samples = samples.reshape(-1, 2)
|
@@ -91,41 +114,54 @@ def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=22050):
|
|
91 |
sample_width=audio_segment.sample_width,
|
92 |
channels=2
|
93 |
)
|
|
|
94 |
return balanced_segment
|
|
|
95 |
return audio_segment
|
96 |
|
97 |
def calculate_rms(segment):
|
98 |
samples = np.array(segment.get_array_of_samples(), dtype=np.float32)
|
99 |
-
|
|
|
|
|
100 |
|
101 |
-
def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=
|
|
|
102 |
target_rms = 10 ** (target_rms_db / 20) * 32767
|
103 |
current_rms = calculate_rms(segment)
|
104 |
if current_rms > 0:
|
105 |
gain_factor = target_rms / current_rms
|
106 |
segment = segment.apply_gain(20 * np.log10(gain_factor))
|
107 |
segment = hard_limit(segment, limit_db=peak_limit_db, sample_rate=sample_rate)
|
|
|
108 |
return segment
|
109 |
|
110 |
-
def hard_limit(audio_segment, limit_db=-3.0, sample_rate=
|
|
|
111 |
limit = 10 ** (limit_db / 20.0) * 32767
|
112 |
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
113 |
samples = np.clip(samples, -limit, limit).astype(np.int16)
|
114 |
-
|
115 |
samples.tobytes(),
|
116 |
frame_rate=sample_rate,
|
117 |
sample_width=audio_segment.sample_width,
|
118 |
channels=audio_segment.channels
|
119 |
)
|
|
|
|
|
120 |
|
121 |
-
def apply_eq(segment, sample_rate=
|
|
|
122 |
segment = segment.high_pass_filter(20)
|
123 |
segment = segment.low_pass_filter(20000)
|
|
|
124 |
return segment
|
125 |
|
126 |
def apply_fade(segment, fade_in_duration=500, fade_out_duration=500):
|
|
|
127 |
segment = segment.fade_in(fade_in_duration)
|
128 |
segment = segment.fade_out(fade_out_duration)
|
|
|
129 |
return segment
|
130 |
|
131 |
# Genre prompt functions
|
@@ -135,7 +171,9 @@ def set_red_hot_chili_peppers_prompt(bpm, drum_beat, synthesizer, rhythmic_steps
|
|
135 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
136 |
bass = f", {bass_style}" if bass_style != "none" else ", groovy basslines"
|
137 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated guitar riffs"
|
138 |
-
|
|
|
|
|
139 |
|
140 |
def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
141 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("intense rhythmic steps" if bpm > 120 else "grungy rhythmic pulse")
|
@@ -143,7 +181,9 @@ def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_
|
|
143 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
144 |
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
|
145 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", raw distorted guitar riffs"
|
146 |
-
|
|
|
|
|
147 |
|
148 |
def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
149 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "driving rhythmic flow")
|
@@ -151,7 +191,9 @@ def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bas
|
|
151 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
152 |
bass = f", {bass_style}" if bass_style != "none" else ", deep bass"
|
153 |
guitar = f", {guitar_style} guitar leads" if guitar_style != "none" else ", soulful guitar leads"
|
154 |
-
|
|
|
|
|
155 |
|
156 |
def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
157 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("heavy rhythmic steps" if bpm > 120 else "sludgy rhythmic groove")
|
@@ -159,7 +201,9 @@ def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, b
|
|
159 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
160 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
161 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", heavy sludgy guitar riffs"
|
162 |
-
|
|
|
|
|
163 |
|
164 |
def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
165 |
styles = ["anthemic", "gritty", "melodic", "fast-paced", "driving"]
|
@@ -173,7 +217,9 @@ def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_st
|
|
173 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
174 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
175 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else f", {style} guitar riffs"
|
176 |
-
|
|
|
|
|
177 |
|
178 |
def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
179 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("dynamic rhythmic steps" if bpm > 120 else "dreamy rhythmic flow")
|
@@ -181,7 +227,9 @@ def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, ba
|
|
181 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
182 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
183 |
guitar = f", {guitar_style} guitar textures" if guitar_style != "none" else ", dreamy guitar textures"
|
184 |
-
|
|
|
|
|
185 |
|
186 |
def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
187 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("complex rhythmic steps" if bpm > 120 else "intricate rhythmic pulse")
|
@@ -189,7 +237,9 @@ def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style
|
|
189 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", atmospheric synths"
|
190 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
191 |
guitar = f", {guitar_style} guitar layers" if guitar_style != "none" else ", intricate guitar layers"
|
192 |
-
|
|
|
|
|
193 |
|
194 |
def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
195 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("bluesy rhythmic steps" if bpm > 120 else "steady rhythmic groove")
|
@@ -197,7 +247,9 @@ def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_st
|
|
197 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
198 |
bass = f", {bass_style}" if bass_style != "none" else ", groovy bass"
|
199 |
guitar = f", {guitar_style} electric guitars" if guitar_style != "none" else ", bluesy electric guitars"
|
200 |
-
|
|
|
|
|
201 |
|
202 |
def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
203 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("quirky rhythmic steps" if bpm > 120 else "energetic rhythmic flow")
|
@@ -205,7 +257,9 @@ def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bas
|
|
205 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
206 |
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
|
207 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", distorted guitar riffs"
|
208 |
-
|
|
|
|
|
209 |
|
210 |
def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
211 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("sharp rhythmic steps" if bpm > 120 else "moody rhythmic pulse")
|
@@ -213,7 +267,9 @@ def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style
|
|
213 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
214 |
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines"
|
215 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
|
216 |
-
|
|
|
|
|
217 |
|
218 |
def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
219 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("catchy rhythmic steps" if bpm > 120 else "jangly rhythmic flow")
|
@@ -221,7 +277,9 @@ def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_styl
|
|
221 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
222 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
223 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
|
224 |
-
|
|
|
|
|
225 |
|
226 |
def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
227 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("aggressive rhythmic steps" if bpm > 120 else "funky rhythmic groove")
|
@@ -229,7 +287,9 @@ def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style
|
|
229 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
230 |
bass = f", {bass_style}" if bass_style != "none" else ", slap bass"
|
231 |
guitar = f", {guitar_style} guitar chords" if guitar_style != "none" else ", funky guitar chords"
|
232 |
-
|
|
|
|
|
233 |
|
234 |
def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
235 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("pulsing rhythmic steps" if bpm > 120 else "deep rhythmic groove")
|
@@ -237,7 +297,9 @@ def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_
|
|
237 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", deep pulsing synths with a repetitive, hypnotic pattern"
|
238 |
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines with a consistent, groovy pulse"
|
239 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
240 |
-
|
|
|
|
|
241 |
|
242 |
def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
243 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "laid-back rhythmic flow")
|
@@ -245,31 +307,49 @@ def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_styl
|
|
245 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", warm analog synth chords with a repetitive, hypnotic progression"
|
246 |
bass = f", {bass_style}" if bass_style != "none" else ", deep basslines with a consistent, groovy pulse"
|
247 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
248 |
-
|
|
|
|
|
249 |
|
250 |
# Preset configurations for genres (optimized for medium model)
|
251 |
PRESETS = {
|
252 |
-
"default": {"cfg_scale": 2.
|
253 |
-
"rock": {"cfg_scale":
|
254 |
-
"techno": {"cfg_scale":
|
255 |
-
"grunge": {"cfg_scale": 2.
|
256 |
-
"indie": {"cfg_scale": 2.
|
257 |
}
|
258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
# Optimized generation function
|
260 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, vram_status: str):
|
261 |
global musicgen_model
|
262 |
if not instrumental_prompt.strip():
|
|
|
263 |
return None, "β οΈ Please enter a valid instrumental prompt!", vram_status
|
264 |
try:
|
265 |
-
|
266 |
start_time = time.time()
|
267 |
max_duration = 30 # Strict 30s max per chunk
|
268 |
total_duration = min(max(total_duration, 30), 120) # Clamp between 30s and 120s
|
269 |
-
processing_sample_rate =
|
270 |
output_sample_rate = 32000 # MusicGen's native rate
|
271 |
audio_segments = []
|
272 |
-
overlap_duration = 0.
|
273 |
remaining_duration = total_duration
|
274 |
|
275 |
if preset != "default":
|
@@ -278,11 +358,13 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
278 |
top_k = preset_params["top_k"]
|
279 |
top_p = preset_params["top_p"]
|
280 |
temperature = preset_params["temperature"]
|
|
|
281 |
|
282 |
if not check_disk_space():
|
|
|
283 |
return None, "β οΈ Insufficient disk space. Free up at least 1 GB.", vram_status
|
284 |
|
285 |
-
|
286 |
seed = 42
|
287 |
base_prompt = instrumental_prompt
|
288 |
clean_memory()
|
@@ -291,8 +373,9 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
291 |
while remaining_duration > 0:
|
292 |
current_duration = min(max_duration, remaining_duration)
|
293 |
generation_duration = current_duration # No overlap in generation
|
|
|
|
|
294 |
|
295 |
-
print(f"Generating chunk ({current_duration}s, VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB)...")
|
296 |
musicgen_model.set_generation_params(
|
297 |
duration=generation_duration,
|
298 |
use_sampling=True,
|
@@ -308,18 +391,28 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
308 |
torch.manual_seed(seed)
|
309 |
np.random.seed(seed)
|
310 |
torch.cuda.manual_seed_all(seed)
|
|
|
311 |
if not audio_segments:
|
|
|
312 |
audio_segment = musicgen_model.generate([base_prompt], progress=True)[0].cpu()
|
313 |
else:
|
|
|
314 |
prev_segment = audio_segments[-1]
|
315 |
prev_segment = balance_stereo(prev_segment, noise_threshold=-60, sample_rate=processing_sample_rate)
|
316 |
temp_wav_path = f"temp_prev_{int(time.time()*1000)}.wav"
|
|
|
317 |
prev_segment.export(temp_wav_path, format="wav")
|
318 |
-
|
|
|
|
|
|
|
|
|
319 |
if prev_sr != processing_sample_rate:
|
|
|
320 |
prev_audio = torchaudio.transforms.Resample(prev_sr, processing_sample_rate)(prev_audio)
|
321 |
prev_audio = prev_audio.to(device)
|
322 |
os.remove(temp_wav_path)
|
|
|
323 |
audio_segment = musicgen_model.generate_continuation(
|
324 |
prompt=prev_audio[:, -int(processing_sample_rate * overlap_duration):],
|
325 |
prompt_sample_rate=processing_sample_rate,
|
@@ -329,24 +422,32 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
329 |
del prev_audio
|
330 |
clean_memory()
|
331 |
except Exception as e:
|
332 |
-
|
|
|
333 |
raise e
|
334 |
|
|
|
335 |
audio_segment = audio_segment.to(dtype=torch.float32)
|
336 |
if audio_segment.dim() == 1:
|
|
|
337 |
audio_segment = torch.stack([audio_segment, audio_segment], dim=0)
|
338 |
elif audio_segment.dim() == 2 and audio_segment.shape[0] != 2:
|
|
|
339 |
audio_segment = torch.cat([audio_segment, audio_segment], dim=0)
|
340 |
|
341 |
if audio_segment.shape[0] != 2:
|
|
|
342 |
raise ValueError(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
|
343 |
|
344 |
temp_wav_path = f"temp_audio_{int(time.time()*1000)}.wav"
|
|
|
345 |
torchaudio.save(temp_wav_path, audio_segment, output_sample_rate, bits_per_sample=16)
|
346 |
segment = AudioSegment.from_wav(temp_wav_path)
|
347 |
os.remove(temp_wav_path)
|
|
|
348 |
segment = segment - 15
|
349 |
if segment.frame_rate != processing_sample_rate:
|
|
|
350 |
segment = segment.set_frame_rate(processing_sample_rate)
|
351 |
segment = balance_stereo(segment, noise_threshold=-60, sample_rate=processing_sample_rate)
|
352 |
segment = rms_normalize(segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
@@ -355,11 +456,11 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
355 |
|
356 |
del audio_segment
|
357 |
clean_memory()
|
358 |
-
vram_status = f"VRAM after chunk: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
359 |
time.sleep(0.1)
|
360 |
remaining_duration -= current_duration
|
361 |
|
362 |
-
|
363 |
final_segment = audio_segments[0][:min(max_duration, total_duration) * 1000]
|
364 |
overlap_ms = int(overlap_duration * 1000)
|
365 |
|
@@ -368,6 +469,7 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
368 |
current_segment = current_segment[:min(max_duration, total_duration - (i * max_duration)) * 1000]
|
369 |
|
370 |
if overlap_ms > 0 and len(current_segment) > overlap_ms:
|
|
|
371 |
prev_overlap = final_segment[-overlap_ms:]
|
372 |
curr_overlap = current_segment[:overlap_ms]
|
373 |
num_samples = len(np.array(prev_overlap.get_array_of_samples(), dtype=np.float32)) // 2
|
@@ -387,10 +489,11 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
387 |
blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
388 |
final_segment = final_segment[:-overlap_ms] + blended_segment + current_segment[overlap_ms:]
|
389 |
else:
|
|
|
390 |
final_segment += current_segment
|
391 |
|
392 |
final_segment = final_segment[:total_duration * 1000]
|
393 |
-
|
394 |
final_segment = rms_normalize(final_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
395 |
final_segment = apply_eq(final_segment, sample_rate=processing_sample_rate)
|
396 |
final_segment = apply_fade(final_segment)
|
@@ -399,39 +502,42 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
399 |
final_segment = final_segment.set_frame_rate(output_sample_rate) # Upsample to output rate
|
400 |
|
401 |
mp3_path = f"output_adjusted_volume_{int(time.time())}.mp3"
|
402 |
-
|
403 |
-
|
404 |
try:
|
|
|
405 |
final_segment.export(
|
406 |
mp3_path,
|
407 |
format="mp3",
|
408 |
bitrate="96k",
|
409 |
tags={"title": "GhostAI Instrumental", "artist": "GhostAI"}
|
410 |
)
|
411 |
-
|
412 |
except Exception as e:
|
413 |
-
|
414 |
fallback_path = f"fallback_output_{int(time.time())}.mp3"
|
415 |
try:
|
416 |
final_segment.export(fallback_path, format="mp3", bitrate="96k")
|
417 |
-
|
418 |
mp3_path = fallback_path
|
419 |
except Exception as fallback_e:
|
420 |
-
|
421 |
raise e
|
422 |
|
423 |
vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
|
|
424 |
return mp3_path, "β
Done! Generated static-free track with adjusted volume levels.", vram_status
|
425 |
except Exception as e:
|
426 |
-
|
427 |
-
|
428 |
return None, f"β Generation failed: {e}", vram_status
|
429 |
finally:
|
430 |
clean_memory()
|
431 |
|
432 |
# Clear inputs function
|
433 |
def clear_inputs():
|
434 |
-
|
|
|
435 |
|
436 |
# Custom CSS
|
437 |
css = """
|
@@ -459,7 +565,7 @@ p {
|
|
459 |
color: #E0E0E0;
|
460 |
font-size: 12px;
|
461 |
}
|
462 |
-
.input-container, .settings-container, .output-container {
|
463 |
max-width: 1200px;
|
464 |
margin: 20px auto;
|
465 |
padding: 20px;
|
@@ -510,7 +616,7 @@ p {
|
|
510 |
"""
|
511 |
|
512 |
# Build Gradio interface
|
513 |
-
|
514 |
with gr.Blocks(css=css) as demo:
|
515 |
gr.Markdown("""
|
516 |
<div class="header-container">
|
@@ -551,7 +657,7 @@ with gr.Blocks(css=css) as demo:
|
|
551 |
label="CFG Scale π―",
|
552 |
minimum=1.0,
|
553 |
maximum=10.0,
|
554 |
-
value=2.
|
555 |
step=0.1,
|
556 |
info="Controls how closely the music follows the prompt."
|
557 |
)
|
@@ -559,7 +665,7 @@ with gr.Blocks(css=css) as demo:
|
|
559 |
label="Top-K Sampling π’",
|
560 |
minimum=10,
|
561 |
maximum=500,
|
562 |
-
value=
|
563 |
step=10,
|
564 |
info="Limits sampling to the top k most likely tokens."
|
565 |
)
|
@@ -648,6 +754,11 @@ with gr.Blocks(css=css) as demo:
|
|
648 |
status = gr.Textbox(label="Status π’", interactive=False)
|
649 |
vram_status = gr.Textbox(label="VRAM Usage π", interactive=False, value="")
|
650 |
|
|
|
|
|
|
|
|
|
|
|
651 |
rhcp_btn.click(set_red_hot_chili_peppers_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
652 |
nirvana_btn.click(set_nirvana_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
653 |
pearl_jam_btn.click(set_pearl_jam_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
@@ -672,9 +783,14 @@ with gr.Blocks(css=css) as demo:
|
|
672 |
inputs=None,
|
673 |
outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status]
|
674 |
)
|
|
|
|
|
|
|
|
|
|
|
675 |
|
676 |
# Launch locally without OpenAPI/docs
|
677 |
-
|
678 |
app = demo.launch(
|
679 |
server_name="0.0.0.0",
|
680 |
server_port=9999,
|
@@ -687,5 +803,5 @@ try:
|
|
687 |
fastapi_app.docs_url = None
|
688 |
fastapi_app.redoc_url = None
|
689 |
fastapi_app.openapi_url = None
|
690 |
-
except Exception:
|
691 |
-
|
|
|
12 |
import warnings
|
13 |
import random
|
14 |
import traceback
|
15 |
+
import logging
|
16 |
+
from datetime import datetime
|
17 |
+
from pathlib import Path
|
18 |
+
import mmap
|
19 |
|
20 |
# Suppress warnings for cleaner output
|
21 |
warnings.filterwarnings("ignore")
|
22 |
|
23 |
# Set PYTORCH_CUDA_ALLOC_CONF for CUDA 12
|
24 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:16"
|
25 |
|
26 |
# Optimize for CUDA 12
|
27 |
torch.backends.cudnn.benchmark = False
|
28 |
torch.backends.cudnn.deterministic = True
|
29 |
|
30 |
+
# Setup logging
|
31 |
+
log_dir = "logs"
|
32 |
+
os.makedirs(log_dir, exist_ok=True)
|
33 |
+
log_file = os.path.join(log_dir, f"musicgen_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
|
34 |
+
logging.basicConfig(
|
35 |
+
level=logging.DEBUG,
|
36 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
37 |
+
handlers=[
|
38 |
+
logging.FileHandler(log_file),
|
39 |
+
logging.StreamHandler(sys.stdout)
|
40 |
+
]
|
41 |
+
)
|
42 |
+
logger = logging.getLogger(__name__)
|
43 |
+
|
44 |
# Device setup
|
45 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
46 |
if device != "cuda":
|
47 |
+
logger.error("CUDA is required for GPU rendering. CPU rendering is disabled.")
|
48 |
sys.exit(1)
|
49 |
+
logger.info(f"Using GPU: {torch.cuda.get_device_name(0)} (CUDA 12)")
|
50 |
+
logger.info(f"Using precision: float16 for model, float32 for CPU processing")
|
51 |
|
52 |
# Memory cleanup function
|
53 |
def clean_memory():
|
|
|
55 |
gc.collect()
|
56 |
torch.cuda.ipc_collect()
|
57 |
torch.cuda.synchronize()
|
58 |
+
vram_mb = torch.cuda.memory_allocated() / 1024**2
|
59 |
+
logger.info(f"Memory cleaned: VRAM allocated = {vram_mb:.2f} MB")
|
60 |
+
logger.debug(f"VRAM summary: {torch.cuda.memory_summary()}")
|
61 |
+
return vram_mb
|
62 |
|
63 |
# Pre-run memory cleanup
|
64 |
clean_memory()
|
65 |
|
66 |
# Load MusicGen medium model into VRAM
|
67 |
try:
|
68 |
+
logger.info("Loading MusicGen medium model into VRAM...")
|
69 |
local_model_path = "./models/musicgen-medium"
|
70 |
if not os.path.exists(local_model_path):
|
71 |
+
logger.error(f"Local model path {local_model_path} does not exist.")
|
72 |
+
logger.error("Please download the MusicGen medium model weights and place them in the correct directory.")
|
73 |
sys.exit(1)
|
74 |
musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
|
75 |
musicgen_model.set_generation_params(
|
76 |
duration=30, # Strict 30s max per chunk
|
77 |
two_step_cfg=False
|
78 |
)
|
79 |
+
logger.info("MusicGen medium model loaded successfully.")
|
80 |
except Exception as e:
|
81 |
+
logger.error(f"Failed to load MusicGen model: {e}")
|
82 |
+
logger.error(traceback.format_exc())
|
83 |
sys.exit(1)
|
84 |
|
85 |
# Check disk space
|
|
|
87 |
stat = os.statvfs(path)
|
88 |
free_space = stat.f_bavail * stat.f_frsize / (1024**3) # Free space in GB
|
89 |
if free_space < 1.0:
|
90 |
+
logger.warning(f"Low disk space ({free_space:.2f} GB). Ensure at least 1 GB free.")
|
91 |
return free_space >= 1.0
|
92 |
|
93 |
# Audio processing functions (CPU-based)
|
94 |
+
def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=16000):
|
95 |
+
logger.debug(f"Balancing stereo for segment with sample rate {sample_rate}")
|
96 |
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
97 |
if audio_segment.channels == 2:
|
98 |
stereo_samples = samples.reshape(-1, 2)
|
|
|
114 |
sample_width=audio_segment.sample_width,
|
115 |
channels=2
|
116 |
)
|
117 |
+
logger.debug("Stereo balancing completed")
|
118 |
return balanced_segment
|
119 |
+
logger.debug("Segment is not stereo, returning unchanged")
|
120 |
return audio_segment
|
121 |
|
122 |
def calculate_rms(segment):
|
123 |
samples = np.array(segment.get_array_of_samples(), dtype=np.float32)
|
124 |
+
rms = np.sqrt(np.mean(samples**2))
|
125 |
+
logger.debug(f"Calculated RMS: {rms}")
|
126 |
+
return rms
|
127 |
|
128 |
+
def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=16000):
|
129 |
+
logger.debug(f"Normalizing RMS for segment with target {target_rms_db} dBFS")
|
130 |
target_rms = 10 ** (target_rms_db / 20) * 32767
|
131 |
current_rms = calculate_rms(segment)
|
132 |
if current_rms > 0:
|
133 |
gain_factor = target_rms / current_rms
|
134 |
segment = segment.apply_gain(20 * np.log10(gain_factor))
|
135 |
segment = hard_limit(segment, limit_db=peak_limit_db, sample_rate=sample_rate)
|
136 |
+
logger.debug("RMS normalization completed")
|
137 |
return segment
|
138 |
|
139 |
+
def hard_limit(audio_segment, limit_db=-3.0, sample_rate=16000):
|
140 |
+
logger.debug(f"Applying hard limit at {limit_db} dBFS")
|
141 |
limit = 10 ** (limit_db / 20.0) * 32767
|
142 |
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
143 |
samples = np.clip(samples, -limit, limit).astype(np.int16)
|
144 |
+
limited_segment = AudioSegment(
|
145 |
samples.tobytes(),
|
146 |
frame_rate=sample_rate,
|
147 |
sample_width=audio_segment.sample_width,
|
148 |
channels=audio_segment.channels
|
149 |
)
|
150 |
+
logger.debug("Hard limit applied")
|
151 |
+
return limited_segment
|
152 |
|
153 |
+
def apply_eq(segment, sample_rate=16000):
|
154 |
+
logger.debug(f"Applying EQ with sample rate {sample_rate}")
|
155 |
segment = segment.high_pass_filter(20)
|
156 |
segment = segment.low_pass_filter(20000)
|
157 |
+
logger.debug("EQ applied")
|
158 |
return segment
|
159 |
|
160 |
def apply_fade(segment, fade_in_duration=500, fade_out_duration=500):
|
161 |
+
logger.debug(f"Applying fade: in={fade_in_duration}ms, out={fade_out_duration}ms")
|
162 |
segment = segment.fade_in(fade_in_duration)
|
163 |
segment = segment.fade_out(fade_out_duration)
|
164 |
+
logger.debug("Fade applied")
|
165 |
return segment
|
166 |
|
167 |
# Genre prompt functions
|
|
|
171 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
172 |
bass = f", {bass_style}" if bass_style != "none" else ", groovy basslines"
|
173 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated guitar riffs"
|
174 |
+
prompt = f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Red Hot Chili Peppers-inspired vibe with dynamic energy and funky breakdowns, {rhythm} at {bpm} BPM."
|
175 |
+
logger.debug(f"Generated RHCP prompt: {prompt}")
|
176 |
+
return prompt
|
177 |
|
178 |
def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
179 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("intense rhythmic steps" if bpm > 120 else "grungy rhythmic pulse")
|
|
|
181 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
182 |
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
|
183 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", raw distorted guitar riffs"
|
184 |
+
prompt = f"Instrumental grunge{bass}{guitar}{drum}{synth}, Nirvana-inspired angst-filled sound with quiet-loud dynamics, {rhythm} at {bpm} BPM."
|
185 |
+
logger.debug(f"Generated Nirvana prompt: {prompt}")
|
186 |
+
return prompt
|
187 |
|
188 |
def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
189 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "driving rhythmic flow")
|
|
|
191 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
192 |
bass = f", {bass_style}" if bass_style != "none" else ", deep bass"
|
193 |
guitar = f", {guitar_style} guitar leads" if guitar_style != "none" else ", soulful guitar leads"
|
194 |
+
prompt = f"Instrumental grunge{bass}{guitar}{drum}{synth}, Pearl Jam-inspired emotional intensity with soaring choruses, {rhythm} at {bpm} BPM."
|
195 |
+
logger.debug(f"Generated Pearl Jam prompt: {prompt}")
|
196 |
+
return prompt
|
197 |
|
198 |
def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
199 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("heavy rhythmic steps" if bpm > 120 else "sludgy rhythmic groove")
|
|
|
201 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
202 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
203 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", heavy sludgy guitar riffs"
|
204 |
+
prompt = f"Instrumental grunge{bass}{guitar}{drum}{synth}, Soundgarden-inspired dark, psychedelic edge, {rhythm} at {bpm} BPM."
|
205 |
+
logger.debug(f"Generated Soundgarden prompt: {prompt}")
|
206 |
+
return prompt
|
207 |
|
208 |
def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
209 |
styles = ["anthemic", "gritty", "melodic", "fast-paced", "driving"]
|
|
|
217 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
218 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
219 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else f", {style} guitar riffs"
|
220 |
+
prompt = f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Foo Fighters-inspired {mood} vibe with powerful choruses, {rhythm} at {bpm} BPM."
|
221 |
+
logger.debug(f"Generated Foo Fighters prompt: {prompt}")
|
222 |
+
return prompt
|
223 |
|
224 |
def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
225 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("dynamic rhythmic steps" if bpm > 120 else "dreamy rhythmic flow")
|
|
|
227 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
228 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
229 |
guitar = f", {guitar_style} guitar textures" if guitar_style != "none" else ", dreamy guitar textures"
|
230 |
+
prompt = f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Smashing Pumpkins-inspired blend of melancholy and aggression, {rhythm} at {bpm} BPM."
|
231 |
+
logger.debug(f"Generated Smashing Pumpkins prompt: {prompt}")
|
232 |
+
return prompt
|
233 |
|
234 |
def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
235 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("complex rhythmic steps" if bpm > 120 else "intricate rhythmic pulse")
|
|
|
237 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", atmospheric synths"
|
238 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
239 |
guitar = f", {guitar_style} guitar layers" if guitar_style != "none" else ", intricate guitar layers"
|
240 |
+
prompt = f"Instrumental experimental rock{bass}{guitar}{drum}{synth}, Radiohead-inspired blend of introspective and innovative soundscapes, {rhythm} at {bpm} BPM."
|
241 |
+
logger.debug(f"Generated Radiohead prompt: {prompt}")
|
242 |
+
return prompt
|
243 |
|
244 |
def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
245 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("bluesy rhythmic steps" if bpm > 120 else "steady rhythmic groove")
|
|
|
247 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
248 |
bass = f", {bass_style}" if bass_style != "none" else ", groovy bass"
|
249 |
guitar = f", {guitar_style} electric guitars" if guitar_style != "none" else ", bluesy electric guitars"
|
250 |
+
prompt = f"Instrumental classic rock{bass}{guitar}{drum}{synth}, Led Zeppelin-inspired raw energy with dynamic solos, {rhythm} at {bpm} BPM."
|
251 |
+
logger.debug(f"Generated Classic Rock prompt: {prompt}")
|
252 |
+
return prompt
|
253 |
|
254 |
def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
255 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("quirky rhythmic steps" if bpm > 120 else "energetic rhythmic flow")
|
|
|
257 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
258 |
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
|
259 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", distorted guitar riffs"
|
260 |
+
prompt = f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Pixies-inspired quirky, energetic vibe, {rhythm} at {bpm} BPM."
|
261 |
+
logger.debug(f"Generated Alternative Rock prompt: {prompt}")
|
262 |
+
return prompt
|
263 |
|
264 |
def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
265 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("sharp rhythmic steps" if bpm > 120 else "moody rhythmic pulse")
|
|
|
267 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
268 |
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines"
|
269 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
|
270 |
+
prompt = f"Instrumental post-punk{bass}{guitar}{drum}{synth}, Joy Division-inspired moody, atmospheric sound with a steady, hypnotic beat, {rhythm} at {bpm} BPM."
|
271 |
+
logger.debug(f"Generated Post-Punk prompt: {prompt}")
|
272 |
+
return prompt
|
273 |
|
274 |
def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
275 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("catchy rhythmic steps" if bpm > 120 else "jangly rhythmic flow")
|
|
|
277 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
278 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
279 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
|
280 |
+
prompt = f"Instrumental indie rock{bass}{guitar}{drum}{synth}, Arctic Monkeys-inspired blend of catchy riffs, {rhythm} at {bpm} BPM."
|
281 |
+
logger.debug(f"Generated Indie Rock prompt: {prompt}")
|
282 |
+
return prompt
|
283 |
|
284 |
def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
285 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("aggressive rhythmic steps" if bpm > 120 else "funky rhythmic groove")
|
|
|
287 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
288 |
bass = f", {bass_style}" if bass_style != "none" else ", slap bass"
|
289 |
guitar = f", {guitar_style} guitar chords" if guitar_style != "none" else ", funky guitar chords"
|
290 |
+
prompt = f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Rage Against the Machine-inspired mix of groove and aggression, {rhythm} at {bpm} BPM."
|
291 |
+
logger.debug(f"Generated Funk Rock prompt: {prompt}")
|
292 |
+
return prompt
|
293 |
|
294 |
def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
295 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("pulsing rhythmic steps" if bpm > 120 else "deep rhythmic groove")
|
|
|
297 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", deep pulsing synths with a repetitive, hypnotic pattern"
|
298 |
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines with a consistent, groovy pulse"
|
299 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
300 |
+
prompt = f"Instrumental Detroit techno{bass}{guitar}{drum}{synth}, Juan Atkins-inspired rhythmic groove with a steady, repetitive beat, {rhythm} at {bpm} BPM."
|
301 |
+
logger.debug(f"Generated Detroit Techno prompt: {prompt}")
|
302 |
+
return prompt
|
303 |
|
304 |
def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
305 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "laid-back rhythmic flow")
|
|
|
307 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", warm analog synth chords with a repetitive, hypnotic progression"
|
308 |
bass = f", {bass_style}" if bass_style != "none" else ", deep basslines with a consistent, groovy pulse"
|
309 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
310 |
+
prompt = f"Instrumental deep house{bass}{guitar}{drum}{synth}, Larry Heard-inspired laid-back groove with a steady, repetitive beat, {rhythm} at {bpm} BPM."
|
311 |
+
logger.debug(f"Generated Deep House prompt: {prompt}")
|
312 |
+
return prompt
|
313 |
|
314 |
# Preset configurations for genres (optimized for medium model)
|
315 |
PRESETS = {
|
316 |
+
"default": {"cfg_scale": 2.0, "top_k": 150, "top_p": 0.9, "temperature": 0.8},
|
317 |
+
"rock": {"cfg_scale": 2.5, "top_k": 140, "top_p": 0.9, "temperature": 0.9},
|
318 |
+
"techno": {"cfg_scale": 1.8, "top_k": 160, "top_p": 0.85, "temperature": 0.7},
|
319 |
+
"grunge": {"cfg_scale": 2.0, "top_k": 150, "top_p": 0.9, "temperature": 0.85},
|
320 |
+
"indie": {"cfg_scale": 2.2, "top_k": 145, "top_p": 0.9, "temperature": 0.8}
|
321 |
}
|
322 |
|
323 |
+
# Function to get the latest log file
|
324 |
+
def get_latest_log():
|
325 |
+
log_files = sorted(Path(log_dir).glob("musicgen_log_*.log"), key=os.path.getmtime, reverse=True)
|
326 |
+
if not log_files:
|
327 |
+
logger.warning("No log files found")
|
328 |
+
return "No log files found."
|
329 |
+
try:
|
330 |
+
with open(log_files[0], "r") as f:
|
331 |
+
content = f.read()
|
332 |
+
logger.info(f"Retrieved latest log file: {log_files[0]}")
|
333 |
+
return content
|
334 |
+
except Exception as e:
|
335 |
+
logger.error(f"Failed to read log file {log_files[0]}: {e}")
|
336 |
+
return f"Error reading log file: {e}"
|
337 |
+
|
338 |
# Optimized generation function
|
339 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, vram_status: str):
|
340 |
global musicgen_model
|
341 |
if not instrumental_prompt.strip():
|
342 |
+
logger.warning("Empty instrumental prompt provided")
|
343 |
return None, "β οΈ Please enter a valid instrumental prompt!", vram_status
|
344 |
try:
|
345 |
+
logger.info("Starting music generation...")
|
346 |
start_time = time.time()
|
347 |
max_duration = 30 # Strict 30s max per chunk
|
348 |
total_duration = min(max(total_duration, 30), 120) # Clamp between 30s and 120s
|
349 |
+
processing_sample_rate = 16000 # Lower for processing
|
350 |
output_sample_rate = 32000 # MusicGen's native rate
|
351 |
audio_segments = []
|
352 |
+
overlap_duration = 0.3 # 300ms for continuation and crossfade
|
353 |
remaining_duration = total_duration
|
354 |
|
355 |
if preset != "default":
|
|
|
358 |
top_k = preset_params["top_k"]
|
359 |
top_p = preset_params["top_p"]
|
360 |
temperature = preset_params["temperature"]
|
361 |
+
logger.info(f"Applied preset {preset}: cfg_scale={cfg_scale}, top_k={top_k}, top_p={top_p}, temperature={temperature}")
|
362 |
|
363 |
if not check_disk_space():
|
364 |
+
logger.error("Insufficient disk space")
|
365 |
return None, "β οΈ Insufficient disk space. Free up at least 1 GB.", vram_status
|
366 |
|
367 |
+
logger.info(f"Generating audio for {total_duration}s with seed=42")
|
368 |
seed = 42
|
369 |
base_prompt = instrumental_prompt
|
370 |
clean_memory()
|
|
|
373 |
while remaining_duration > 0:
|
374 |
current_duration = min(max_duration, remaining_duration)
|
375 |
generation_duration = current_duration # No overlap in generation
|
376 |
+
chunk_num = len(audio_segments) + 1
|
377 |
+
logger.info(f"Generating chunk {chunk_num} ({current_duration}s, VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB)")
|
378 |
|
|
|
379 |
musicgen_model.set_generation_params(
|
380 |
duration=generation_duration,
|
381 |
use_sampling=True,
|
|
|
391 |
torch.manual_seed(seed)
|
392 |
np.random.seed(seed)
|
393 |
torch.cuda.manual_seed_all(seed)
|
394 |
+
clean_memory() # Pre-generation cleanup
|
395 |
if not audio_segments:
|
396 |
+
logger.debug("Generating first chunk")
|
397 |
audio_segment = musicgen_model.generate([base_prompt], progress=True)[0].cpu()
|
398 |
else:
|
399 |
+
logger.debug("Generating continuation chunk")
|
400 |
prev_segment = audio_segments[-1]
|
401 |
prev_segment = balance_stereo(prev_segment, noise_threshold=-60, sample_rate=processing_sample_rate)
|
402 |
temp_wav_path = f"temp_prev_{int(time.time()*1000)}.wav"
|
403 |
+
logger.debug(f"Exporting previous segment to {temp_wav_path}")
|
404 |
prev_segment.export(temp_wav_path, format="wav")
|
405 |
+
# Use memory-mapped file I/O
|
406 |
+
with open(temp_wav_path, "rb") as f:
|
407 |
+
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
408 |
+
prev_audio, prev_sr = torchaudio.load(temp_wav_path)
|
409 |
+
mmapped_file.close()
|
410 |
if prev_sr != processing_sample_rate:
|
411 |
+
logger.debug(f"Resampling from {prev_sr} to {processing_sample_rate}")
|
412 |
prev_audio = torchaudio.transforms.Resample(prev_sr, processing_sample_rate)(prev_audio)
|
413 |
prev_audio = prev_audio.to(device)
|
414 |
os.remove(temp_wav_path)
|
415 |
+
logger.debug(f"Deleted temporary file {temp_wav_path}")
|
416 |
audio_segment = musicgen_model.generate_continuation(
|
417 |
prompt=prev_audio[:, -int(processing_sample_rate * overlap_duration):],
|
418 |
prompt_sample_rate=processing_sample_rate,
|
|
|
422 |
del prev_audio
|
423 |
clean_memory()
|
424 |
except Exception as e:
|
425 |
+
logger.error(f"Error in chunk {chunk_num} generation: {e}")
|
426 |
+
logger.error(traceback.format_exc())
|
427 |
raise e
|
428 |
|
429 |
+
logger.debug(f"Generated audio segment shape: {audio_segment.shape}")
|
430 |
audio_segment = audio_segment.to(dtype=torch.float32)
|
431 |
if audio_segment.dim() == 1:
|
432 |
+
logger.debug("Converting mono to stereo")
|
433 |
audio_segment = torch.stack([audio_segment, audio_segment], dim=0)
|
434 |
elif audio_segment.dim() == 2 and audio_segment.shape[0] != 2:
|
435 |
+
logger.debug("Adjusting to stereo")
|
436 |
audio_segment = torch.cat([audio_segment, audio_segment], dim=0)
|
437 |
|
438 |
if audio_segment.shape[0] != 2:
|
439 |
+
logger.error(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
|
440 |
raise ValueError(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
|
441 |
|
442 |
temp_wav_path = f"temp_audio_{int(time.time()*1000)}.wav"
|
443 |
+
logger.debug(f"Saving audio segment to {temp_wav_path}")
|
444 |
torchaudio.save(temp_wav_path, audio_segment, output_sample_rate, bits_per_sample=16)
|
445 |
segment = AudioSegment.from_wav(temp_wav_path)
|
446 |
os.remove(temp_wav_path)
|
447 |
+
logger.debug(f"Deleted temporary file {temp_wav_path}")
|
448 |
segment = segment - 15
|
449 |
if segment.frame_rate != processing_sample_rate:
|
450 |
+
logger.debug(f"Setting segment sample rate to {processing_sample_rate}")
|
451 |
segment = segment.set_frame_rate(processing_sample_rate)
|
452 |
segment = balance_stereo(segment, noise_threshold=-60, sample_rate=processing_sample_rate)
|
453 |
segment = rms_normalize(segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
|
|
456 |
|
457 |
del audio_segment
|
458 |
clean_memory()
|
459 |
+
vram_status = f"VRAM after chunk {chunk_num}: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
460 |
time.sleep(0.1)
|
461 |
remaining_duration -= current_duration
|
462 |
|
463 |
+
logger.info("Combining audio chunks...")
|
464 |
final_segment = audio_segments[0][:min(max_duration, total_duration) * 1000]
|
465 |
overlap_ms = int(overlap_duration * 1000)
|
466 |
|
|
|
469 |
current_segment = current_segment[:min(max_duration, total_duration - (i * max_duration)) * 1000]
|
470 |
|
471 |
if overlap_ms > 0 and len(current_segment) > overlap_ms:
|
472 |
+
logger.debug(f"Applying crossfade between chunks {i} and {i+1}")
|
473 |
prev_overlap = final_segment[-overlap_ms:]
|
474 |
curr_overlap = current_segment[:overlap_ms]
|
475 |
num_samples = len(np.array(prev_overlap.get_array_of_samples(), dtype=np.float32)) // 2
|
|
|
489 |
blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
490 |
final_segment = final_segment[:-overlap_ms] + blended_segment + current_segment[overlap_ms:]
|
491 |
else:
|
492 |
+
logger.debug(f"Concatenating chunk {i+1} without crossfade")
|
493 |
final_segment += current_segment
|
494 |
|
495 |
final_segment = final_segment[:total_duration * 1000]
|
496 |
+
logger.info("Post-processing final track...")
|
497 |
final_segment = rms_normalize(final_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
498 |
final_segment = apply_eq(final_segment, sample_rate=processing_sample_rate)
|
499 |
final_segment = apply_fade(final_segment)
|
|
|
502 |
final_segment = final_segment.set_frame_rate(output_sample_rate) # Upsample to output rate
|
503 |
|
504 |
mp3_path = f"output_adjusted_volume_{int(time.time())}.mp3"
|
505 |
+
logger.info("β οΈ WARNING: Audio is set to safe levels (~ -23 dBFS RMS, -3 dBFS peak). Start playback at LOW volume (10-20%) and adjust gradually.")
|
506 |
+
logger.info("VERIFY: Open the file in Audacity to check for static. RMS should be ~ -23 dBFS, peaks β€ -3 dBFS. Report any static or issues.")
|
507 |
try:
|
508 |
+
logger.debug(f"Exporting final audio to {mp3_path}")
|
509 |
final_segment.export(
|
510 |
mp3_path,
|
511 |
format="mp3",
|
512 |
bitrate="96k",
|
513 |
tags={"title": "GhostAI Instrumental", "artist": "GhostAI"}
|
514 |
)
|
515 |
+
logger.info(f"Final audio saved to {mp3_path}")
|
516 |
except Exception as e:
|
517 |
+
logger.error(f"Error exporting MP3: {e}")
|
518 |
fallback_path = f"fallback_output_{int(time.time())}.mp3"
|
519 |
try:
|
520 |
final_segment.export(fallback_path, format="mp3", bitrate="96k")
|
521 |
+
logger.info(f"Final audio saved to fallback: {fallback_path}")
|
522 |
mp3_path = fallback_path
|
523 |
except Exception as fallback_e:
|
524 |
+
logger.error(f"Failed to save fallback MP3: {fallback_e}")
|
525 |
raise e
|
526 |
|
527 |
vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
528 |
+
logger.info(f"Generation completed in {time.time() - start_time:.2f} seconds")
|
529 |
return mp3_path, "β
Done! Generated static-free track with adjusted volume levels.", vram_status
|
530 |
except Exception as e:
|
531 |
+
logger.error(f"Generation failed: {e}")
|
532 |
+
logger.error(traceback.format_exc())
|
533 |
return None, f"β Generation failed: {e}", vram_status
|
534 |
finally:
|
535 |
clean_memory()
|
536 |
|
537 |
# Clear inputs function
|
538 |
def clear_inputs():
|
539 |
+
logger.info("Clearing input fields")
|
540 |
+
return "", 2.0, 150, 0.9, 0.8, 30, 120, "none", "none", "none", "none", "none", -23.0, "default", ""
|
541 |
|
542 |
# Custom CSS
|
543 |
css = """
|
|
|
565 |
color: #E0E0E0;
|
566 |
font-size: 12px;
|
567 |
}
|
568 |
+
.input-container, .settings-container, .output-container, .logs-container {
|
569 |
max-width: 1200px;
|
570 |
margin: 20px auto;
|
571 |
padding: 20px;
|
|
|
616 |
"""
|
617 |
|
618 |
# Build Gradio interface
|
619 |
+
logger.info("Building Gradio interface...")
|
620 |
with gr.Blocks(css=css) as demo:
|
621 |
gr.Markdown("""
|
622 |
<div class="header-container">
|
|
|
657 |
label="CFG Scale π―",
|
658 |
minimum=1.0,
|
659 |
maximum=10.0,
|
660 |
+
value=2.0,
|
661 |
step=0.1,
|
662 |
info="Controls how closely the music follows the prompt."
|
663 |
)
|
|
|
665 |
label="Top-K Sampling π’",
|
666 |
minimum=10,
|
667 |
maximum=500,
|
668 |
+
value=150,
|
669 |
step=10,
|
670 |
info="Limits sampling to the top k most likely tokens."
|
671 |
)
|
|
|
754 |
status = gr.Textbox(label="Status π’", interactive=False)
|
755 |
vram_status = gr.Textbox(label="VRAM Usage π", interactive=False, value="")
|
756 |
|
757 |
+
with gr.Column(elem_classes="logs-container"):
|
758 |
+
gr.Markdown("### π Logs")
|
759 |
+
log_output = gr.Textbox(label="Last Log File Contents", lines=20, interactive=False)
|
760 |
+
log_btn = gr.Button("View Last Log π")
|
761 |
+
|
762 |
rhcp_btn.click(set_red_hot_chili_peppers_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
763 |
nirvana_btn.click(set_nirvana_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
764 |
pearl_jam_btn.click(set_pearl_jam_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
|
|
783 |
inputs=None,
|
784 |
outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status]
|
785 |
)
|
786 |
+
log_btn.click(
|
787 |
+
get_latest_log,
|
788 |
+
inputs=None,
|
789 |
+
outputs=log_output
|
790 |
+
)
|
791 |
|
792 |
# Launch locally without OpenAPI/docs
|
793 |
+
logger.info("Launching Gradio UI at http://localhost:9999...")
|
794 |
app = demo.launch(
|
795 |
server_name="0.0.0.0",
|
796 |
server_port=9999,
|
|
|
803 |
fastapi_app.docs_url = None
|
804 |
fastapi_app.redoc_url = None
|
805 |
fastapi_app.openapi_url = None
|
806 |
+
except Exception as e:
|
807 |
+
logger.error(f"Failed to configure FastAPI app: {e}")
|