Spaces:

chenxie95
/

MeanAudio

Running on Zero

App Files Files Community

AndreasXi commited on 20 days ago

Commit

19ec831

1 Parent(s): 51fb3d2

add rlhf

Browse files

Files changed (1) hide show

app.py +75 -57

app.py CHANGED Viewed

@@ -102,39 +102,6 @@ def save_preference_feedback(prompt, audio1_path, audio2_path, preference, addit
     log.info(f"Preference feedback saved: {preference} for prompt: '{prompt[:50]}...'")
     return f"✅ Thanks for your feedback, preference recorded: {preference}"
-def save_preference_feedback_from_flag(input_text, duration, cfg_strength, num_steps, variant,
-                                     audio1_path, audio2_path, prompt_used, preference, comment):
-    """处理Gradio flagging回调的反馈保存"""
-    try:
-        if not preference:
-            print("⚠️ 用户没有选择偏好")
-            return
-        feedback_data = {
-            "timestamp": datetime.now().isoformat(),
-            "prompt": prompt_used or input_text,
-            "audio1_path": audio1_path,
-            "audio2_path": audio2_path,
-            "preference": preference,
-            "additional_comment": comment or "",
-            "generation_params": {
-                "duration": duration,
-                "cfg_strength": cfg_strength,
-                "num_steps": num_steps,
-                "variant": variant
-            }
-        }
-        with open(FEEDBACK_FILE, "a", encoding="utf-8") as f:
-            f.write(json.dumps(feedback_data, ensure_ascii=False) + "\n")
-        log.info(f"✅ 反馈已保存: {preference} - {prompt_used[:50]}...")
-        print(f"✅ 用户反馈已保存到: {FEEDBACK_FILE}")
-    except Exception as e:
-        log.error(f"保存反馈时出错: {e}")
-        print(f"❌ 保存反馈时出错: {e}")
 @spaces.GPU(duration=60)
 @torch.inference_mode()
@@ -227,31 +194,13 @@ gr_interface = gr.Interface(
     fn=generate_audio_gradio,
     inputs=[input_text, duration, cfg_strength, denoising_steps, variant],
     outputs=[
-        gr.Audio(label="🎵 Audio Sample 1"),
-        gr.Audio(label="🎵 Audio Sample 2"),
         gr.Textbox(label="Prompt Used", interactive=False)
     ],
-    additional_inputs=[
-        gr.Radio(
-            choices=[
-                ("🎵 Audio 1 更好", "audio1"),
-                ("🎵 Audio 2 更好", "audio2"),
-                ("😊 两者都很好", "equal"),
-                ("😔 两者都不好", "both_bad")
-            ],
-            label="🤔 请选择您更喜欢的音频:",
-            value=None
-        ),
-        gr.Textbox(
-            label="💭 评论 (可选)",
-            placeholder="您对音频质量的具体反馈...",
-            lines=2
-        )
-    ],
     title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
-    description="🎯 **RLHF数据收集**: 现在生成2个音频样本！收集反馈数据用于改进模型。使用分析工具: `python analyze_feedback.py`",
-    flagging_mode="manual",
-    flagging_callback=lambda *args: save_preference_feedback_from_flag(*args),
     examples=[
         ["Generate the festive sounds of a fireworks show: explosions lighting up the sky, crowd cheering, and the faint music playing in the background!! Celebration of the new year!", 10, 3, 1, "meanaudio_s_full"],
         ["Melodic human whistling harmonizing with natural birdsong", 10, 3, 1, "meanaudio_s_full"],
@@ -266,13 +215,82 @@ gr_interface = gr.Interface(
         ['doorbell ding once followed by footsteps gradually getting louder and a door is opened ', 10, 3, 1, "meanaudio_s_full"],
         ["A fork scrapes a plate, water drips slowly into a sink, and the faint hum of a refrigerator lingers in the background", 10, 3, 1, "meanaudio_s_full"]
     ],
-    cache_examples="lazy", # Turn on to cache.
     )
 if __name__ == "__main__":
     ensure_models_downloaded()
     load_model_cache()
-    gr_interface.queue(15).launch()
 # theme = gr.themes.Soft(
 #     primary_hue="blue",

     log.info(f"Preference feedback saved: {preference} for prompt: '{prompt[:50]}...'")
     return f"✅ Thanks for your feedback, preference recorded: {preference}"
 @spaces.GPU(duration=60)
 @torch.inference_mode()
     fn=generate_audio_gradio,
     inputs=[input_text, duration, cfg_strength, denoising_steps, variant],
     outputs=[
+        gr.Audio(label="🎵 Audio Sample 1", type="filepath"),
+        gr.Audio(label="🎵 Audio Sample 2", type="filepath"),
         gr.Textbox(label="Prompt Used", interactive=False)
     ],
     title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
+    description="🎯 **RLHF数据收集**: 现在生成2个音频样本！生成后请在下方选择偏好并提交。",
+    flagging_mode="never",
     examples=[
         ["Generate the festive sounds of a fireworks show: explosions lighting up the sky, crowd cheering, and the faint music playing in the background!! Celebration of the new year!", 10, 3, 1, "meanaudio_s_full"],
         ["Melodic human whistling harmonizing with natural birdsong", 10, 3, 1, "meanaudio_s_full"],
         ['doorbell ding once followed by footsteps gradually getting louder and a door is opened ', 10, 3, 1, "meanaudio_s_full"],
         ["A fork scrapes a plate, water drips slowly into a sink, and the faint hum of a refrigerator lingers in the background", 10, 3, 1, "meanaudio_s_full"]
     ],
+    cache_examples="lazy",
+)
+# ==== Preference collection UI (RLHF) ====
+# 允许用户在两段音频之间选择偏好，并补充备注
+with gr.Blocks() as pref_block:
+    gr.Markdown("## 🧠 RLHF 偏好标注")
+    gr.Markdown("生成完成后，请在下方选择您更喜欢的音频（或都不好/差不多），并可附加简短备注。点“提交偏好”即可写入 `./rlhf/user_preferences.jsonl`。")
+    # 这里复用上面 Interface 的输出：我们需要拿到两段音频的文件路径与使用的 prompt
+    # 为了连接这两个“界面”，再放一组可粘连的输入组件：
+    with gr.Row():
+        gen_audio1_path = gr.Textbox(label="Audio 1 路径（自动填充）", interactive=False)
+        gen_audio2_path = gr.Textbox(label="Audio 2 路径（自动填充）", interactive=False)
+    prompt_used = gr.Textbox(label="Prompt（自动填充）", interactive=False)
+    # 偏好选项与备注
+    pref_choice = gr.Radio(
+        ["audio1", "audio2", "equal", "both_bad"],
+        value="audio1",
+        label="你更偏好哪个？",
+        info="equal=差不多; both_bad=都不好"
+    )
+    pref_comment = gr.Textbox(label="可选备注（例如：哪一段更贴合描述、是否有噪声/破音等）", lines=2)
+    submit_btn = gr.Button("✅ 提交偏好")
+    submit_status = gr.Markdown()
+    # 小工具：读取当前标注条目数
+    def _count_feedback():
+        try:
+            with open(FEEDBACK_FILE, "r", encoding="utf-8") as f:
+                return sum(1 for _ in f)
+        except FileNotFoundError:
+            return 0
+    refresh_btn = gr.Button("📈 刷新统计")
+    count_box = gr.Markdown()
+    def submit_preference_ui(a1, a2, p, pref, cmt):
+        if not a1 or not a2:
+            return "❗请先在上面的生成器里生成两段音频。"
+        # 写入 jsonl
+        msg = save_preference_feedback(p, a1, a2, pref, cmt)
+        return msg
+    def refresh_count_ui():
+        n = _count_feedback()
+        return f"当前已收集 **{n}** 条偏好样本。"
+    submit_btn.click(
+        fn=submit_preference_ui,
+        inputs=[gen_audio1_path, gen_audio2_path, prompt_used, pref_choice, pref_comment],
+        outputs=submit_status
     )
+    refresh_btn.click(fn=refresh_count_ui, outputs=count_box)
+# —— 把 Interface 的输出“联动”到偏好区：当用户生成完成后，自动把路径和 prompt 填入偏好区输入框 ——
+def _passthrough(a1, a2, p):
+    # 直接把接口输出透传给下方偏好区
+    return a1, a2, p
+# 用 Events 把 Interface 的输出连到 pref_block 的三个文本框
+gr_interface.submit(
+    fn=_passthrough,
+    inputs=gr_interface.outputs,   # [Audio1(filepath), Audio2(filepath), PromptUsed]
+    outputs=[gen_audio1_path, gen_audio2_path, prompt_used],
+)
 if __name__ == "__main__":
     ensure_models_downloaded()
     load_model_cache()
+    gr_interface.queue(15).launch(share=False, show_api=False)
 # theme = gr.themes.Soft(
 #     primary_hue="blue",