update
Browse files
app.py
CHANGED
@@ -111,6 +111,7 @@ def generate_audio_gradio(
|
|
111 |
cfg_strength,
|
112 |
num_steps,
|
113 |
variant,
|
|
|
114 |
):
|
115 |
|
116 |
if duration <= 0 or num_steps <= 0:
|
@@ -146,8 +147,7 @@ def generate_audio_gradio(
|
|
146 |
sampler_arg_name = "fm"
|
147 |
|
148 |
rng = torch.Generator(device=device)
|
149 |
-
|
150 |
-
rng.manual_seed(42)
|
151 |
|
152 |
audios = generation_func(
|
153 |
[prompt]*NUM_SAMPLE,
|
@@ -167,7 +167,7 @@ def generate_audio_gradio(
|
|
167 |
|
168 |
for i, audio in enumerate(audios):
|
169 |
audio = audio.float().cpu()
|
170 |
-
audio = fade_out(audio, seq_cfg.sampling_rate)
|
171 |
|
172 |
current_time_string = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
173 |
filename = f"{safe_prompt}_{current_time_string}_{i}.flac"
|
@@ -188,7 +188,7 @@ output_audio = gr.Audio(label="Generated Audio", type="filepath")
|
|
188 |
denoising_steps = gr.Slider(minimum=1, maximum=25, value=1, step=1, label="Sampling Steps", interactive=True)
|
189 |
cfg_strength = gr.Slider(minimum=1, maximum=10, value=4.5, step=0.5, label="Guidance Scale", interactive=True)
|
190 |
duration = gr.Slider(minimum=1, maximum=30, value=10, step=1, label="Duration", interactive=True)
|
191 |
-
|
192 |
variant = gr.Dropdown(label="Model Variant", choices=list(all_model_cfg.keys()), value='meanaudio_s_full', interactive=True)
|
193 |
|
194 |
|
@@ -214,27 +214,26 @@ description_text = """
|
|
214 |
|
215 |
gr_interface = gr.Interface(
|
216 |
fn=generate_audio_gradio,
|
217 |
-
inputs=[input_text, duration, cfg_strength, denoising_steps, variant],
|
218 |
outputs=[
|
219 |
gr.Audio(label="🎵 Audio Sample", type="filepath"),
|
220 |
gr.Textbox(label="Prompt Used", interactive=False)
|
221 |
],
|
222 |
title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
|
223 |
-
description=
|
224 |
flagging_mode="never",
|
225 |
examples=[
|
226 |
-
["
|
227 |
["Melodic human whistling harmonizing with natural birdsong", 10, 3, 1, "meanaudio_s_full"],
|
228 |
-
["A parade marches through a town square, with drumbeats pounding, children clapping, and a horse neighing amidst the commotion", 10, 3, 1, "meanaudio_s_full"],
|
229 |
-
["Quiet speech and then and airplane flying away", 10, 3, 1, "meanaudio_s_full"],
|
230 |
-
["
|
231 |
-
["A basketball bounces rhythmically on a court, shoes squeak against the floor, and a referee’s whistle cuts through the air", 10, 3, 1, "meanaudio_s_full"],
|
232 |
-
["
|
233 |
-
["A
|
234 |
-
["
|
235 |
-
["
|
236 |
-
[
|
237 |
-
["A fork scrapes a plate, water drips slowly into a sink, and the faint hum of a refrigerator lingers in the background", 10, 3, 1, "meanaudio_s_full"]
|
238 |
],
|
239 |
cache_examples="lazy",
|
240 |
)
|
|
|
111 |
cfg_strength,
|
112 |
num_steps,
|
113 |
variant,
|
114 |
+
seed
|
115 |
):
|
116 |
|
117 |
if duration <= 0 or num_steps <= 0:
|
|
|
147 |
sampler_arg_name = "fm"
|
148 |
|
149 |
rng = torch.Generator(device=device)
|
150 |
+
rng.manual_seed(seed)
|
|
|
151 |
|
152 |
audios = generation_func(
|
153 |
[prompt]*NUM_SAMPLE,
|
|
|
167 |
|
168 |
for i, audio in enumerate(audios):
|
169 |
audio = audio.float().cpu()
|
170 |
+
audio = fade_out(audio, seq_cfg.sampling_rate, fade_ms=100)
|
171 |
|
172 |
current_time_string = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
173 |
filename = f"{safe_prompt}_{current_time_string}_{i}.flac"
|
|
|
188 |
denoising_steps = gr.Slider(minimum=1, maximum=25, value=1, step=1, label="Sampling Steps", interactive=True)
|
189 |
cfg_strength = gr.Slider(minimum=1, maximum=10, value=4.5, step=0.5, label="Guidance Scale", interactive=True)
|
190 |
duration = gr.Slider(minimum=1, maximum=30, value=10, step=1, label="Duration", interactive=True)
|
191 |
+
seed = gr.Slider(minimum=1, maximum=100, value=42, step=1, label="Seed", interactive=True)
|
192 |
variant = gr.Dropdown(label="Model Variant", choices=list(all_model_cfg.keys()), value='meanaudio_s_full', interactive=True)
|
193 |
|
194 |
|
|
|
214 |
|
215 |
gr_interface = gr.Interface(
|
216 |
fn=generate_audio_gradio,
|
217 |
+
inputs=[input_text, duration, cfg_strength, denoising_steps, variant, seed],
|
218 |
outputs=[
|
219 |
gr.Audio(label="🎵 Audio Sample", type="filepath"),
|
220 |
gr.Textbox(label="Prompt Used", interactive=False)
|
221 |
],
|
222 |
title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
|
223 |
+
description=description_text,
|
224 |
flagging_mode="never",
|
225 |
examples=[
|
226 |
+
["Guitar and piano playing a warm music, with a soft and gentle melody, perfect for a romantic evening.", 10, 3, 1, "meanaudio_s_full", 42],
|
227 |
["Melodic human whistling harmonizing with natural birdsong", 10, 3, 1, "meanaudio_s_full"],
|
228 |
+
["A parade marches through a town square, with drumbeats pounding, children clapping, and a horse neighing amidst the commotion", 10, 3, 1, "meanaudio_s_full", 42],
|
229 |
+
["Quiet speech and then and airplane flying away", 10, 3, 1, "meanaudio_s_full", 42],
|
230 |
+
["The sound of a steam engine.", 10, 3, 1, "meanaudio_s_full", 42],
|
231 |
+
["A basketball bounces rhythmically on a court, shoes squeak against the floor, and a referee’s whistle cuts through the air", 10, 3, 1, "meanaudio_s_full", 42],
|
232 |
+
["Chopping meat on a wooden table.", 10, 3, 1, "meanaudio_s_full", 42],
|
233 |
+
["A vehicle engine revving then accelerating at a high rate as a metal surface is whipped followed by tires skidding.", 10, 3, 1, "meanaudio_s_full", 42],
|
234 |
+
["Battlefield scene, continuous roar of artillery and gunfire, high fidelity, the sharp crack of bullets, the thundering explosions of bombs, and the screams of wounded soldiers.", 10, 3, 1, "meanaudio_s_full", 42],
|
235 |
+
["Pop music that upbeat, catchy, and easy to listen, high fidelity, with simple melodies, electronic instruments and polished production.", 10, 3, 1, "meanaudio_s_full", 42],
|
236 |
+
["A fast-paced instrumental piece with a classical vibe featuring stringed instruments, evoking an energetic and uplifting mood.", 10, 3, 1, "meanaudio_s_full", 42]
|
|
|
237 |
],
|
238 |
cache_examples="lazy",
|
239 |
)
|