Spaces:

chenxie95
/

MeanAudio

Running on Zero

App Files Files Community

AndreasXi commited on 20 days ago

Commit

079604c

1 Parent(s): 19ec831

add rlhf

Browse files

Files changed (1) hide show

app.py +6 -74

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ setup_eval_logging()
 OUTPUT_DIR = Path("./output/gradio")
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
-NUM_SAMPLE = 2
 # 创建RLHF反馈数据目录
 FEEDBACK_DIR = Path("./rlhf")
@@ -175,10 +175,11 @@ def generate_audio_gradio(
         torchaudio.save(str(save_path), audio, seq_cfg.sampling_rate)
         log.info(f"Audio saved to {save_path}")
         save_paths.append(str(save_path))
     if device == "cuda":
         torch.cuda.empty_cache()
-    return save_paths[0], save_paths[1], prompt
 # Gradio input and output components
@@ -194,12 +195,11 @@ gr_interface = gr.Interface(
     fn=generate_audio_gradio,
     inputs=[input_text, duration, cfg_strength, denoising_steps, variant],
     outputs=[
-        gr.Audio(label="🎵 Audio Sample 1", type="filepath"),
-        gr.Audio(label="🎵 Audio Sample 2", type="filepath"),
         gr.Textbox(label="Prompt Used", interactive=False)
     ],
     title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
-    description="🎯 **RLHF数据收集**: 现在生成2个音频样本！生成后请在下方选择偏好并提交。",
     flagging_mode="never",
     examples=[
         ["Generate the festive sounds of a fireworks show: explosions lighting up the sky, crowd cheering, and the faint music playing in the background!! Celebration of the new year!", 10, 3, 1, "meanaudio_s_full"],
@@ -218,79 +218,11 @@ gr_interface = gr.Interface(
     cache_examples="lazy",
 )
-# ==== Preference collection UI (RLHF) ====
-# 允许用户在两段音频之间选择偏好，并补充备注
-with gr.Blocks() as pref_block:
-    gr.Markdown("## 🧠 RLHF 偏好标注")
-    gr.Markdown("生成完成后，请在下方选择您更喜欢的音频（或都不好/差不多），并可附加简短备注。点“提交偏好”即可写入 `./rlhf/user_preferences.jsonl`。")
-    # 这里复用上面 Interface 的输出：我们需要拿到两段音频的文件路径与使用的 prompt
-    # 为了连接这两个“界面”，再放一组可粘连的输入组件：
-    with gr.Row():
-        gen_audio1_path = gr.Textbox(label="Audio 1 路径（自动填充）", interactive=False)
-        gen_audio2_path = gr.Textbox(label="Audio 2 路径（自动填充）", interactive=False)
-    prompt_used = gr.Textbox(label="Prompt（自动填充）", interactive=False)
-    # 偏好选项与备注
-    pref_choice = gr.Radio(
-        ["audio1", "audio2", "equal", "both_bad"],
-        value="audio1",
-        label="你更偏好哪个？",
-        info="equal=差不多; both_bad=都不好"
-    )
-    pref_comment = gr.Textbox(label="可选备注（例如：哪一段更贴合描述、是否有噪声/破音等）", lines=2)
-    submit_btn = gr.Button("✅ 提交偏好")
-    submit_status = gr.Markdown()
-    # 小工具：读取当前标注条目数
-    def _count_feedback():
-        try:
-            with open(FEEDBACK_FILE, "r", encoding="utf-8") as f:
-                return sum(1 for _ in f)
-        except FileNotFoundError:
-            return 0
-    refresh_btn = gr.Button("📈 刷新统计")
-    count_box = gr.Markdown()
-    def submit_preference_ui(a1, a2, p, pref, cmt):
-        if not a1 or not a2:
-            return "❗请先在上面的生成器里生成两段音频。"
-        # 写入 jsonl
-        msg = save_preference_feedback(p, a1, a2, pref, cmt)
-        return msg
-    def refresh_count_ui():
-        n = _count_feedback()
-        return f"当前已收集 **{n}** 条偏好样本。"
-    submit_btn.click(
-        fn=submit_preference_ui,
-        inputs=[gen_audio1_path, gen_audio2_path, prompt_used, pref_choice, pref_comment],
-        outputs=submit_status
-    )
-    refresh_btn.click(fn=refresh_count_ui, outputs=count_box)
-# —— 把 Interface 的输出“联动”到偏好区：当用户生成完成后，自动把路径和 prompt 填入偏好区输入框 ——
-def _passthrough(a1, a2, p):
-    # 直接把接口输出透传给下方偏好区
-    return a1, a2, p
-# 用 Events 把 Interface 的输出连到 pref_block 的三个文本框
-gr_interface.submit(
-    fn=_passthrough,
-    inputs=gr_interface.outputs,   # [Audio1(filepath), Audio2(filepath), PromptUsed]
-    outputs=[gen_audio1_path, gen_audio2_path, prompt_used],
-)
 if __name__ == "__main__":
     ensure_models_downloaded()
     load_model_cache()
-    gr_interface.queue(15).launch(share=False, show_api=False)
 # theme = gr.themes.Soft(
 #     primary_hue="blue",

 OUTPUT_DIR = Path("./output/gradio")
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+NUM_SAMPLE = 1
 # 创建RLHF反馈数据目录
 FEEDBACK_DIR = Path("./rlhf")
         torchaudio.save(str(save_path), audio, seq_cfg.sampling_rate)
         log.info(f"Audio saved to {save_path}")
         save_paths.append(str(save_path))
     if device == "cuda":
         torch.cuda.empty_cache()
+    return save_paths[0], prompt
 # Gradio input and output components
     fn=generate_audio_gradio,
     inputs=[input_text, duration, cfg_strength, denoising_steps, variant],
     outputs=[
+        gr.Audio(label="🎵 Audio Sample", type="filepath"),
         gr.Textbox(label="Prompt Used", interactive=False)
     ],
     title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
+    description="",
     flagging_mode="never",
     examples=[
         ["Generate the festive sounds of a fireworks show: explosions lighting up the sky, crowd cheering, and the faint music playing in the background!! Celebration of the new year!", 10, 3, 1, "meanaudio_s_full"],
     cache_examples="lazy",
 )
 if __name__ == "__main__":
     ensure_models_downloaded()
     load_model_cache()
+    gr_interface.queue(15).launch()
 # theme = gr.themes.Soft(
 #     primary_hue="blue",