ghostai1 commited on
Commit
08a444f
Β·
verified Β·
1 Parent(s): 144716b

Create appcud12.py

Browse files

reworking for cuda cdnn 12.1

Files changed (1) hide show
  1. appcud12.py +691 -0
appcud12.py ADDED
@@ -0,0 +1,691 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torchaudio
4
+ import time
5
+ import sys
6
+ import numpy as np
7
+ import gc
8
+ import gradio as gr
9
+ from pydub import AudioSegment
10
+ from audiocraft.models import MusicGen
11
+ from torch.cuda.amp import autocast
12
+ import warnings
13
+ import random
14
+ import traceback
15
+
16
+ # Suppress warnings for cleaner output
17
+ warnings.filterwarnings("ignore")
18
+
19
+ # Set PYTORCH_CUDA_ALLOC_CONF for CUDA 12
20
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:16"
21
+
22
+ # Optimize for CUDA 12
23
+ torch.backends.cudnn.benchmark = False
24
+ torch.backends.cudnn.deterministic = True
25
+
26
+ # Device setup
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+ if device != "cuda":
29
+ print("ERROR: CUDA is required for GPU rendering. CPU rendering is disabled.")
30
+ sys.exit(1)
31
+ print(f"Using GPU: {torch.cuda.get_device_name(0)} (CUDA 12)")
32
+ print(f"Using precision: float16 for model, float32 for CPU processing")
33
+
34
+ # Memory cleanup function
35
+ def clean_memory():
36
+ torch.cuda.empty_cache()
37
+ gc.collect()
38
+ torch.cuda.ipc_collect()
39
+ torch.cuda.synchronize()
40
+ print(f"Memory cleaned: VRAM allocated = {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
41
+
42
+ # Pre-run memory cleanup
43
+ clean_memory()
44
+
45
+ # Load MusicGen medium model into VRAM
46
+ try:
47
+ print("Loading MusicGen medium model into VRAM...")
48
+ local_model_path = "./models/musicgen-medium"
49
+ if not os.path.exists(local_model_path):
50
+ print(f"ERROR: Local model path {local_model_path} does not exist.")
51
+ print("Please download the MusicGen medium model weights and place them in the correct directory.")
52
+ sys.exit(1)
53
+ musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
54
+ musicgen_model.set_generation_params(
55
+ duration=30, # Strict 30s max per chunk
56
+ two_step_cfg=False
57
+ )
58
+ print("MusicGen medium model loaded successfully.")
59
+ except Exception as e:
60
+ print(f"ERROR: Failed to load MusicGen model: {e}")
61
+ sys.exit(1)
62
+
63
+ # Check disk space
64
+ def check_disk_space(path="."):
65
+ stat = os.statvfs(path)
66
+ free_space = stat.f_bavail * stat.f_frsize / (1024**3) # Free space in GB
67
+ if free_space < 1.0:
68
+ print(f"WARNING: Low disk space ({free_space:.2f} GB). Ensure at least 1 GB free.")
69
+ return free_space >= 1.0
70
+
71
+ # Audio processing functions (CPU-based)
72
+ def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=22050):
73
+ samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
74
+ if audio_segment.channels == 2:
75
+ stereo_samples = samples.reshape(-1, 2)
76
+ db_samples = 20 * np.log10(np.abs(stereo_samples) + 1e-10)
77
+ mask = db_samples > noise_threshold
78
+ stereo_samples = stereo_samples * mask
79
+ left_nonzero = stereo_samples[:, 0][stereo_samples[:, 0] != 0]
80
+ right_nonzero = stereo_samples[:, 1][stereo_samples[:, 1] != 0]
81
+ left_rms = np.sqrt(np.mean(left_nonzero**2)) if len(left_nonzero) > 0 else 0
82
+ right_rms = np.sqrt(np.mean(right_nonzero**2)) if len(right_nonzero) > 0 else 0
83
+ if left_rms > 0 and right_rms > 0:
84
+ avg_rms = (left_rms + right_rms) / 2
85
+ stereo_samples[:, 0] = stereo_samples[:, 0] * (avg_rms / left_rms)
86
+ stereo_samples[:, 1] = stereo_samples[:, 1] * (avg_rms / right_rms)
87
+ balanced_samples = stereo_samples.flatten().astype(np.int16)
88
+ balanced_segment = AudioSegment(
89
+ balanced_samples.tobytes(),
90
+ frame_rate=sample_rate,
91
+ sample_width=audio_segment.sample_width,
92
+ channels=2
93
+ )
94
+ return balanced_segment
95
+ return audio_segment
96
+
97
+ def calculate_rms(segment):
98
+ samples = np.array(segment.get_array_of_samples(), dtype=np.float32)
99
+ return np.sqrt(np.mean(samples**2))
100
+
101
+ def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=22050):
102
+ target_rms = 10 ** (target_rms_db / 20) * 32767
103
+ current_rms = calculate_rms(segment)
104
+ if current_rms > 0:
105
+ gain_factor = target_rms / current_rms
106
+ segment = segment.apply_gain(20 * np.log10(gain_factor))
107
+ segment = hard_limit(segment, limit_db=peak_limit_db, sample_rate=sample_rate)
108
+ return segment
109
+
110
+ def hard_limit(audio_segment, limit_db=-3.0, sample_rate=22050):
111
+ limit = 10 ** (limit_db / 20.0) * 32767
112
+ samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
113
+ samples = np.clip(samples, -limit, limit).astype(np.int16)
114
+ return AudioSegment(
115
+ samples.tobytes(),
116
+ frame_rate=sample_rate,
117
+ sample_width=audio_segment.sample_width,
118
+ channels=audio_segment.channels
119
+ )
120
+
121
+ def apply_eq(segment, sample_rate=22050):
122
+ segment = segment.high_pass_filter(20)
123
+ segment = segment.low_pass_filter(20000)
124
+ return segment
125
+
126
+ def apply_fade(segment, fade_in_duration=500, fade_out_duration=500):
127
+ segment = segment.fade_in(fade_in_duration)
128
+ segment = segment.fade_out(fade_out_duration)
129
+ return segment
130
+
131
+ # Genre prompt functions
132
+ def set_red_hot_chili_peppers_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
133
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("strong rhythmic steps" if bpm > 120 else "groovy rhythmic flow")
134
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
135
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
136
+ bass = f", {bass_style}" if bass_style != "none" else ", groovy basslines"
137
+ guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated guitar riffs"
138
+ return f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Red Hot Chili Peppers-inspired vibe with dynamic energy and funky breakdowns, {rhythm} at {bpm} BPM."
139
+
140
+ def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
141
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("intense rhythmic steps" if bpm > 120 else "grungy rhythmic pulse")
142
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
143
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
144
+ bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
145
+ guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", raw distorted guitar riffs"
146
+ return f"Instrumental grunge{bass}{guitar}{drum}{synth}, Nirvana-inspired angst-filled sound with quiet-loud dynamics, {rhythm} at {bpm} BPM."
147
+
148
+ def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
149
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "driving rhythmic flow")
150
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
151
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
152
+ bass = f", {bass_style}" if bass_style != "none" else ", deep bass"
153
+ guitar = f", {guitar_style} guitar leads" if guitar_style != "none" else ", soulful guitar leads"
154
+ return f"Instrumental grunge{bass}{guitar}{drum}{synth}, Pearl Jam-inspired emotional intensity with soaring choruses, {rhythm} at {bpm} BPM."
155
+
156
+ def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
157
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("heavy rhythmic steps" if bpm > 120 else "sludgy rhythmic groove")
158
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
159
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
160
+ bass = f", {bass_style}" if bass_style != "none" else ""
161
+ guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", heavy sludgy guitar riffs"
162
+ return f"Instrumental grunge{bass}{guitar}{drum}{synth}, Soundgarden-inspired dark, psychedelic edge, {rhythm} at {bpm} BPM."
163
+
164
+ def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
165
+ styles = ["anthemic", "gritty", "melodic", "fast-paced", "driving"]
166
+ tempos = ["upbeat", "mid-tempo", "high-energy"]
167
+ moods = ["energetic", "introspective", "rebellious", "uplifting"]
168
+ style = random.choice(styles)
169
+ tempo = random.choice(tempos)
170
+ mood = random.choice(moods)
171
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("powerful rhythmic steps" if bpm > 120 else "catchy rhythmic groove")
172
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
173
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
174
+ bass = f", {bass_style}" if bass_style != "none" else ""
175
+ guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else f", {style} guitar riffs"
176
+ return f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Foo Fighters-inspired {mood} vibe with powerful choruses, {rhythm} at {bpm} BPM."
177
+
178
+ def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
179
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("dynamic rhythmic steps" if bpm > 120 else "dreamy rhythmic flow")
180
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
181
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
182
+ bass = f", {bass_style}" if bass_style != "none" else ""
183
+ guitar = f", {guitar_style} guitar textures" if guitar_style != "none" else ", dreamy guitar textures"
184
+ return f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Smashing Pumpkins-inspired blend of melancholy and aggression, {rhythm} at {bpm} BPM."
185
+
186
+ def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
187
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("complex rhythmic steps" if bpm > 120 else "intricate rhythmic pulse")
188
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
189
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ", atmospheric synths"
190
+ bass = f", {bass_style}" if bass_style != "none" else ""
191
+ guitar = f", {guitar_style} guitar layers" if guitar_style != "none" else ", intricate guitar layers"
192
+ return f"Instrumental experimental rock{bass}{guitar}{drum}{synth}, Radiohead-inspired blend of introspective and innovative soundscapes, {rhythm} at {bpm} BPM."
193
+
194
+ def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
195
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("bluesy rhythmic steps" if bpm > 120 else "steady rhythmic groove")
196
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
197
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
198
+ bass = f", {bass_style}" if bass_style != "none" else ", groovy bass"
199
+ guitar = f", {guitar_style} electric guitars" if guitar_style != "none" else ", bluesy electric guitars"
200
+ return f"Instrumental classic rock{bass}{guitar}{drum}{synth}, Led Zeppelin-inspired raw energy with dynamic solos, {rhythm} at {bpm} BPM."
201
+
202
+ def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
203
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("quirky rhythmic steps" if bpm > 120 else "energetic rhythmic flow")
204
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
205
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
206
+ bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
207
+ guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", distorted guitar riffs"
208
+ return f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Pixies-inspired quirky, energetic vibe, {rhythm} at {bpm} BPM."
209
+
210
+ def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
211
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("sharp rhythmic steps" if bpm > 120 else "moody rhythmic pulse")
212
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
213
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
214
+ bass = f", {bass_style}" if bass_style != "none" else ", driving basslines"
215
+ guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
216
+ return f"Instrumental post-punk{bass}{guitar}{drum}{synth}, Joy Division-inspired moody, atmospheric sound with a steady, hypnotic beat, {rhythm} at {bpm} BPM."
217
+
218
+ def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
219
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("catchy rhythmic steps" if bpm > 120 else "jangly rhythmic flow")
220
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
221
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
222
+ bass = f", {bass_style}" if bass_style != "none" else ""
223
+ guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
224
+ return f"Instrumental indie rock{bass}{guitar}{drum}{synth}, Arctic Monkeys-inspired blend of catchy riffs, {rhythm} at {bpm} BPM."
225
+
226
+ def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
227
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("aggressive rhythmic steps" if bpm > 120 else "funky rhythmic groove")
228
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ""
229
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ""
230
+ bass = f", {bass_style}" if bass_style != "none" else ", slap bass"
231
+ guitar = f", {guitar_style} guitar chords" if guitar_style != "none" else ", funky guitar chords"
232
+ return f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Rage Against the Machine-inspired mix of groove and aggression, {rhythm} at {bpm} BPM."
233
+
234
+ def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
235
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("pulsing rhythmic steps" if bpm > 120 else "deep rhythmic groove")
236
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ", crisp hi-hats and a steady four-on-the-floor kick drum"
237
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ", deep pulsing synths with a repetitive, hypnotic pattern"
238
+ bass = f", {bass_style}" if bass_style != "none" else ", driving basslines with a consistent, groovy pulse"
239
+ guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
240
+ return f"Instrumental Detroit techno{bass}{guitar}{drum}{synth}, Juan Atkins-inspired rhythmic groove with a steady, repetitive beat, {rhythm} at {bpm} BPM."
241
+
242
+ def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
243
+ rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "laid-back rhythmic flow")
244
+ drum = f", {drum_beat} drums" if drum_beat != "none" else ", steady four-on-the-floor kick drum with soft hi-hats"
245
+ synth = f", {synthesizer} accents" if synthesizer != "none" else ", warm analog synth chords with a repetitive, hypnotic progression"
246
+ bass = f", {bass_style}" if bass_style != "none" else ", deep basslines with a consistent, groovy pulse"
247
+ guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
248
+ return f"Instrumental deep house{bass}{guitar}{drum}{synth}, Larry Heard-inspired laid-back groove with a steady, repetitive beat, {rhythm} at {bpm} BPM."
249
+
250
+ # Preset configurations for genres (optimized for medium model)
251
+ PRESETS = {
252
+ "default": {"cfg_scale": 2.5, "top_k": 200, "top_p": 0.9, "temperature": 0.8},
253
+ "rock": {"cfg_scale": 3.0, "top_k": 180, "top_p": 0.9, "temperature": 0.9},
254
+ "techno": {"cfg_scale": 2.0, "top_k": 250, "top_p": 0.85, "temperature": 0.7},
255
+ "grunge": {"cfg_scale": 2.5, "top_k": 200, "top_p": 0.9, "temperature": 0.85},
256
+ "indie": {"cfg_scale": 2.7, "top_k": 190, "top_p": 0.9, "temperature": 0.8}
257
+ }
258
+
259
+ # Optimized generation function
260
+ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, vram_status: str):
261
+ global musicgen_model
262
+ if not instrumental_prompt.strip():
263
+ return None, "⚠️ Please enter a valid instrumental prompt!", vram_status
264
+ try:
265
+ print("Starting music generation...")
266
+ start_time = time.time()
267
+ max_duration = 30 # Strict 30s max per chunk
268
+ total_duration = min(max(total_duration, 30), 120) # Clamp between 30s and 120s
269
+ processing_sample_rate = 22050 # Lower for processing
270
+ output_sample_rate = 32000 # MusicGen's native rate
271
+ audio_segments = []
272
+ overlap_duration = 0.5 # 500ms for crossfade (post-processing)
273
+ remaining_duration = total_duration
274
+
275
+ if preset != "default":
276
+ preset_params = PRESETS.get(preset, PRESETS["default"])
277
+ cfg_scale = preset_params["cfg_scale"]
278
+ top_k = preset_params["top_k"]
279
+ top_p = preset_params["top_p"]
280
+ temperature = preset_params["temperature"]
281
+
282
+ if not check_disk_space():
283
+ return None, "⚠️ Insufficient disk space. Free up at least 1 GB.", vram_status
284
+
285
+ print(f"Generating audio for {total_duration}s...")
286
+ seed = 42
287
+ base_prompt = instrumental_prompt
288
+ clean_memory()
289
+ vram_status = f"Initial VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
290
+
291
+ while remaining_duration > 0:
292
+ current_duration = min(max_duration, remaining_duration)
293
+ generation_duration = current_duration # No overlap in generation
294
+
295
+ print(f"Generating chunk ({current_duration}s, VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB)...")
296
+ musicgen_model.set_generation_params(
297
+ duration=generation_duration,
298
+ use_sampling=True,
299
+ top_k=top_k,
300
+ top_p=top_p,
301
+ temperature=temperature,
302
+ cfg_coef=cfg_scale
303
+ )
304
+
305
+ try:
306
+ with torch.no_grad():
307
+ with autocast(dtype=torch.float16):
308
+ torch.manual_seed(seed)
309
+ np.random.seed(seed)
310
+ torch.cuda.manual_seed_all(seed)
311
+ if not audio_segments:
312
+ audio_segment = musicgen_model.generate([base_prompt], progress=True)[0].cpu()
313
+ else:
314
+ prev_segment = audio_segments[-1]
315
+ prev_segment = balance_stereo(prev_segment, noise_threshold=-60, sample_rate=processing_sample_rate)
316
+ temp_wav_path = f"temp_prev_{int(time.time()*1000)}.wav"
317
+ prev_segment.export(temp_wav_path, format="wav")
318
+ prev_audio, prev_sr = torchaudio.load(temp_wav_path)
319
+ if prev_sr != processing_sample_rate:
320
+ prev_audio = torchaudio.transforms.Resample(prev_sr, processing_sample_rate)(prev_audio)
321
+ prev_audio = prev_audio.to(device)
322
+ os.remove(temp_wav_path)
323
+ audio_segment = musicgen_model.generate_continuation(
324
+ prompt=prev_audio[:, -int(processing_sample_rate * overlap_duration):],
325
+ prompt_sample_rate=processing_sample_rate,
326
+ descriptions=[base_prompt],
327
+ progress=True
328
+ )[0].cpu()
329
+ del prev_audio
330
+ clean_memory()
331
+ except Exception as e:
332
+ print(f"Error in generation: {e}")
333
+ raise e
334
+
335
+ audio_segment = audio_segment.to(dtype=torch.float32)
336
+ if audio_segment.dim() == 1:
337
+ audio_segment = torch.stack([audio_segment, audio_segment], dim=0)
338
+ elif audio_segment.dim() == 2 and audio_segment.shape[0] != 2:
339
+ audio_segment = torch.cat([audio_segment, audio_segment], dim=0)
340
+
341
+ if audio_segment.shape[0] != 2:
342
+ raise ValueError(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
343
+
344
+ temp_wav_path = f"temp_audio_{int(time.time()*1000)}.wav"
345
+ torchaudio.save(temp_wav_path, audio_segment, output_sample_rate, bits_per_sample=16)
346
+ segment = AudioSegment.from_wav(temp_wav_path)
347
+ os.remove(temp_wav_path)
348
+ segment = segment - 15
349
+ if segment.frame_rate != processing_sample_rate:
350
+ segment = segment.set_frame_rate(processing_sample_rate)
351
+ segment = balance_stereo(segment, noise_threshold=-60, sample_rate=processing_sample_rate)
352
+ segment = rms_normalize(segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
353
+ segment = apply_eq(segment, sample_rate=processing_sample_rate)
354
+ audio_segments.append(segment)
355
+
356
+ del audio_segment
357
+ clean_memory()
358
+ vram_status = f"VRAM after chunk: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
359
+ time.sleep(0.1)
360
+ remaining_duration -= current_duration
361
+
362
+ print("Combining audio chunks...")
363
+ final_segment = audio_segments[0][:min(max_duration, total_duration) * 1000]
364
+ overlap_ms = int(overlap_duration * 1000)
365
+
366
+ for i in range(1, len(audio_segments)):
367
+ current_segment = audio_segments[i]
368
+ current_segment = current_segment[:min(max_duration, total_duration - (i * max_duration)) * 1000]
369
+
370
+ if overlap_ms > 0 and len(current_segment) > overlap_ms:
371
+ prev_overlap = final_segment[-overlap_ms:]
372
+ curr_overlap = current_segment[:overlap_ms]
373
+ num_samples = len(np.array(prev_overlap.get_array_of_samples(), dtype=np.float32)) // 2
374
+ blended_samples = np.zeros((num_samples, 2), dtype=np.float32)
375
+ prev_samples = np.array(prev_overlap.get_array_of_samples(), dtype=np.float32).reshape(-1, 2)
376
+ curr_samples = np.array(curr_overlap.get_array_of_samples(), dtype=np.float32).reshape(-1, 2)
377
+ hann_window = 0.5 * (1 - np.cos(2 * np.pi * np.arange(num_samples) / num_samples))
378
+ fade_out = hann_window[::-1]
379
+ fade_in = hann_window
380
+ blended_samples = (prev_samples * fade_out[:, None] + curr_samples * fade_in[:, None])
381
+ blended_segment = AudioSegment(
382
+ blended_samples.astype(np.int16).tobytes(),
383
+ frame_rate=processing_sample_rate,
384
+ sample_width=2,
385
+ channels=2
386
+ )
387
+ blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
388
+ final_segment = final_segment[:-overlap_ms] + blended_segment + current_segment[overlap_ms:]
389
+ else:
390
+ final_segment += current_segment
391
+
392
+ final_segment = final_segment[:total_duration * 1000]
393
+ print("Post-processing final track...")
394
+ final_segment = rms_normalize(final_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
395
+ final_segment = apply_eq(final_segment, sample_rate=processing_sample_rate)
396
+ final_segment = apply_fade(final_segment)
397
+ final_segment = balance_stereo(final_segment, noise_threshold=-60, sample_rate=processing_sample_rate)
398
+ final_segment = final_segment - 10
399
+ final_segment = final_segment.set_frame_rate(output_sample_rate) # Upsample to output rate
400
+
401
+ mp3_path = f"output_adjusted_volume_{int(time.time())}.mp3"
402
+ print("⚠️ WARNING: Audio is set to safe levels (~ -23 dBFS RMS, -3 dBFS peak). Start playback at LOW volume (10-20%) and adjust gradually.")
403
+ print("VERIFY: Open the file in Audacity to check for static. RMS should be ~ -23 dBFS, peaks ≀ -3 dBFS. Report any static or issues.")
404
+ try:
405
+ final_segment.export(
406
+ mp3_path,
407
+ format="mp3",
408
+ bitrate="96k",
409
+ tags={"title": "GhostAI Instrumental", "artist": "GhostAI"}
410
+ )
411
+ print(f"Final audio saved to {mp3_path}")
412
+ except Exception as e:
413
+ print(f"Error exporting MP3: {e}")
414
+ fallback_path = f"fallback_output_{int(time.time())}.mp3"
415
+ try:
416
+ final_segment.export(fallback_path, format="mp3", bitrate="96k")
417
+ print(f"Final audio saved to fallback: {fallback_path}")
418
+ mp3_path = fallback_path
419
+ except Exception as fallback_e:
420
+ print(f"Failed to save fallback MP3: {fallback_e}")
421
+ raise e
422
+
423
+ vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
424
+ return mp3_path, "βœ… Done! Generated static-free track with adjusted volume levels.", vram_status
425
+ except Exception as e:
426
+ error_trace = traceback.format_exc()
427
+ print(f"Generation failed: {e}\n{error_trace}")
428
+ return None, f"❌ Generation failed: {e}", vram_status
429
+ finally:
430
+ clean_memory()
431
+
432
+ # Clear inputs function
433
+ def clear_inputs():
434
+ return "", 2.5, 200, 0.9, 0.8, 30, 120, "none", "none", "none", "none", "none", -23.0, "default", ""
435
+
436
+ # Custom CSS
437
+ css = """
438
+ body {
439
+ background: linear-gradient(135deg, #0A0A0A 0%, #1C2526 100%);
440
+ color: #E0E0E0;
441
+ font-family: 'Orbitron', sans-serif;
442
+ }
443
+ .header-container {
444
+ text-align: center;
445
+ padding: 10px 20px;
446
+ background: rgba(0, 0, 0, 0.9);
447
+ border-bottom: 1px solid #00FF9F;
448
+ }
449
+ #ghost-logo {
450
+ font-size: 40px;
451
+ animation: glitch-ghost 1.5s infinite;
452
+ }
453
+ h1 {
454
+ color: #A100FF;
455
+ font-size: 24px;
456
+ animation: glitch-text 2s infinite;
457
+ }
458
+ p {
459
+ color: #E0E0E0;
460
+ font-size: 12px;
461
+ }
462
+ .input-container, .settings-container, .output-container {
463
+ max-width: 1200px;
464
+ margin: 20px auto;
465
+ padding: 20px;
466
+ background: rgba(28, 37, 38, 0.8);
467
+ border-radius: 10px;
468
+ }
469
+ .textbox {
470
+ background: #1A1A1A;
471
+ border: 1px solid #A100FF;
472
+ color: #E0E0E0;
473
+ }
474
+ .genre-buttons {
475
+ display: flex;
476
+ justify-content: center;
477
+ flex-wrap: wrap;
478
+ gap: 15px;
479
+ }
480
+ .genre-btn, button {
481
+ background: linear-gradient(45deg, #A100FF, #00FF9F);
482
+ border: none;
483
+ color: #0A0A0A;
484
+ padding: 10px 20px;
485
+ border-radius: 5px;
486
+ }
487
+ .gradio-container {
488
+ padding: 20px;
489
+ }
490
+ .group-container {
491
+ margin-bottom: 20px;
492
+ padding: 15px;
493
+ border: 1px solid #00FF9F;
494
+ border-radius: 8px;
495
+ }
496
+ @keyframes glitch-ghost {
497
+ 0% { transform: translate(0, 0); opacity: 1; }
498
+ 20% { transform: translate(-5px, 2px); opacity: 0.8; }
499
+ 100% { transform: translate(0, 0); opacity: 1; }
500
+ }
501
+ @keyframes glitch-text {
502
+ 0% { transform: translate(0, 0); }
503
+ 20% { transform: translate(-2px, 1px); }
504
+ 100% { transform: translate(0, 0); }
505
+ }
506
+ @font-face {
507
+ font-family: 'Orbitron';
508
+ src: url('https://fonts.gstatic.com/s/orbitron/v29/yMJRMIlzdpvBhQQL_Qq7dy0.woff2') format('woff2');
509
+ }
510
+ """
511
+
512
+ # Build Gradio interface
513
+ print("Building Gradio interface... Please wait for the UI to launch.")
514
+ with gr.Blocks(css=css) as demo:
515
+ gr.Markdown("""
516
+ <div class="header-container">
517
+ <div id="ghost-logo">πŸ‘»</div>
518
+ <h1>GhostAI Music Generator 🎹</h1>
519
+ <p>Summon the Sound of the Unknown</p>
520
+ </div>
521
+ """)
522
+
523
+ with gr.Column(elem_classes="input-container"):
524
+ gr.Markdown("### 🎸 Prompt Settings")
525
+ instrumental_prompt = gr.Textbox(
526
+ label="Instrumental Prompt ✍️",
527
+ placeholder="Click a genre button or type your own instrumental prompt",
528
+ lines=4,
529
+ elem_classes="textbox"
530
+ )
531
+ with gr.Row(elem_classes="genre-buttons"):
532
+ rhcp_btn = gr.Button("Red Hot Chili Peppers 🌢️", elem_classes="genre-btn")
533
+ nirvana_btn = gr.Button("Nirvana Grunge 🎸", elem_classes="genre-btn")
534
+ pearl_jam_btn = gr.Button("Pearl Jam Grunge πŸ¦ͺ", elem_classes="genre-btn")
535
+ soundgarden_btn = gr.Button("Soundgarden Grunge πŸŒ‘", elem_classes="genre-btn")
536
+ foo_fighters_btn = gr.Button("Foo Fighters 🀘", elem_classes="genre-btn")
537
+ smashing_pumpkins_btn = gr.Button("Smashing Pumpkins πŸŽƒ", elem_classes="genre-btn")
538
+ radiohead_btn = gr.Button("Radiohead 🧠", elem_classes="genre-btn")
539
+ classic_rock_btn = gr.Button("Classic Rock 🎸", elem_classes="genre-btn")
540
+ alternative_rock_btn = gr.Button("Alternative Rock 🎡", elem_classes="genre-btn")
541
+ post_punk_btn = gr.Button("Post-Punk πŸ–€", elem_classes="genre-btn")
542
+ indie_rock_btn = gr.Button("Indie Rock 🎀", elem_classes="genre-btn")
543
+ funk_rock_btn = gr.Button("Funk Rock πŸ•Ί", elem_classes="genre-btn")
544
+ detroit_techno_btn = gr.Button("Detroit Techno πŸŽ›οΈ", elem_classes="genre-btn")
545
+ deep_house_btn = gr.Button("Deep House 🏠", elem_classes="genre-btn")
546
+
547
+ with gr.Column(elem_classes="settings-container"):
548
+ gr.Markdown("### βš™οΈ API Settings")
549
+ with gr.Group(elem_classes="group-container"):
550
+ cfg_scale = gr.Slider(
551
+ label="CFG Scale 🎯",
552
+ minimum=1.0,
553
+ maximum=10.0,
554
+ value=2.5,
555
+ step=0.1,
556
+ info="Controls how closely the music follows the prompt."
557
+ )
558
+ top_k = gr.Slider(
559
+ label="Top-K Sampling πŸ”’",
560
+ minimum=10,
561
+ maximum=500,
562
+ value=200,
563
+ step=10,
564
+ info="Limits sampling to the top k most likely tokens."
565
+ )
566
+ top_p = gr.Slider(
567
+ label="Top-P Sampling 🎰",
568
+ minimum=0.0,
569
+ maximum=1.0,
570
+ value=0.9,
571
+ step=0.05,
572
+ info="Keeps tokens with cumulative probability above p."
573
+ )
574
+ temperature = gr.Slider(
575
+ label="Temperature πŸ”₯",
576
+ minimum=0.1,
577
+ maximum=2.0,
578
+ value=0.8,
579
+ step=0.1,
580
+ info="Controls randomness; lower values reduce noise."
581
+ )
582
+ total_duration = gr.Dropdown(
583
+ label="Song Length ⏳ (seconds)",
584
+ choices=[30, 60, 90, 120],
585
+ value=30,
586
+ info="Select the total duration of the track."
587
+ )
588
+ bpm = gr.Slider(
589
+ label="Tempo 🎡 (BPM)",
590
+ minimum=60,
591
+ maximum=180,
592
+ value=120,
593
+ step=1,
594
+ info="Beats per minute to set the track's tempo."
595
+ )
596
+ drum_beat = gr.Dropdown(
597
+ label="Drum Beat πŸ₯",
598
+ choices=["none", "standard rock", "funk groove", "techno kick", "jazz swing"],
599
+ value="none",
600
+ info="Select a drum beat style to influence the rhythm."
601
+ )
602
+ synthesizer = gr.Dropdown(
603
+ label="Synthesizer 🎹",
604
+ choices=["none", "analog synth", "digital pad", "arpeggiated synth"],
605
+ value="none",
606
+ info="Select a synthesizer style for electronic accents."
607
+ )
608
+ rhythmic_steps = gr.Dropdown(
609
+ label="Rhythmic Steps πŸ‘£",
610
+ choices=["none", "syncopated steps", "steady steps", "complex steps"],
611
+ value="none",
612
+ info="Select a rhythmic step style to enhance the beat."
613
+ )
614
+ bass_style = gr.Dropdown(
615
+ label="Bass Style 🎸",
616
+ choices=["none", "slap bass", "deep bass", "melodic bass"],
617
+ value="none",
618
+ info="Select a bass style to shape the low end."
619
+ )
620
+ guitar_style = gr.Dropdown(
621
+ label="Guitar Style 🎸",
622
+ choices=["none", "distorted", "clean", "jangle"],
623
+ value="none",
624
+ info="Select a guitar style to define the riffs."
625
+ )
626
+ target_volume = gr.Slider(
627
+ label="Target Volume 🎚️ (dBFS RMS)",
628
+ minimum=-30.0,
629
+ maximum=-20.0,
630
+ value=-23.0,
631
+ step=1.0,
632
+ info="Adjust output loudness (-23 dBFS is standard, -20 dBFS is louder, -30 dBFS is quieter)."
633
+ )
634
+ preset = gr.Dropdown(
635
+ label="Preset Configuration πŸŽ›οΈ",
636
+ choices=["default", "rock", "techno", "grunge", "indie"],
637
+ value="default",
638
+ info="Select a preset optimized for specific genres."
639
+ )
640
+
641
+ with gr.Row(elem_classes="action-buttons"):
642
+ gen_btn = gr.Button("Generate Music πŸš€")
643
+ clr_btn = gr.Button("Clear Inputs 🧹")
644
+
645
+ with gr.Column(elem_classes="output-container"):
646
+ gr.Markdown("### 🎧 Output")
647
+ out_audio = gr.Audio(label="Generated Instrumental Track 🎡", type="filepath")
648
+ status = gr.Textbox(label="Status πŸ“’", interactive=False)
649
+ vram_status = gr.Textbox(label="VRAM Usage πŸ“Š", interactive=False, value="")
650
+
651
+ rhcp_btn.click(set_red_hot_chili_peppers_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
652
+ nirvana_btn.click(set_nirvana_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
653
+ pearl_jam_btn.click(set_pearl_jam_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
654
+ soundgarden_btn.click(set_soundgarden_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
655
+ foo_fighters_btn.click(set_foo_fighters_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
656
+ smashing_pumpkins_btn.click(set_smashing_pumpkins_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
657
+ radiohead_btn.click(set_radiohead_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
658
+ classic_rock_btn.click(set_classic_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
659
+ alternative_rock_btn.click(set_alternative_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
660
+ post_punk_btn.click(set_post_punk_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
661
+ indie_rock_btn.click(set_indie_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
662
+ funk_rock_btn.click(set_funk_rock_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
663
+ detroit_techno_btn.click(set_detroit_techno_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
664
+ deep_house_btn.click(set_deep_house_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
665
+ gen_btn.click(
666
+ generate_music,
667
+ inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status],
668
+ outputs=[out_audio, status, vram_status]
669
+ )
670
+ clr_btn.click(
671
+ clear_inputs,
672
+ inputs=None,
673
+ outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status]
674
+ )
675
+
676
+ # Launch locally without OpenAPI/docs
677
+ print("Launching Gradio UI at http://localhost:9999... Do not interrupt until the UI is fully loaded.")
678
+ app = demo.launch(
679
+ server_name="0.0.0.0",
680
+ server_port=9999,
681
+ share=False,
682
+ inbrowser=False,
683
+ show_error=True
684
+ )
685
+ try:
686
+ fastapi_app = demo._server.app
687
+ fastapi_app.docs_url = None
688
+ fastapi_app.redoc_url = None
689
+ fastapi_app.openapi_url = None
690
+ except Exception:
691
+ pass