Spaces:

chenxie95
/

MeanAudio

Running on Zero

App Files Files Community

AndreasXi commited on 19 days ago

Commit

51fb3d2

1 Parent(s): 22e35a0

add rlhf

Browse files

Files changed (1) hide show

app.py +59 -8

app.py CHANGED Viewed

@@ -102,6 +102,39 @@ def save_preference_feedback(prompt, audio1_path, audio2_path, preference, addit
     log.info(f"Preference feedback saved: {preference} for prompt: '{prompt[:50]}...'")
     return f"✅ Thanks for your feedback, preference recorded: {preference}"
 @spaces.GPU(duration=60)
 @torch.inference_mode()
@@ -159,16 +192,16 @@ def generate_audio_gradio(
         **{sampler_arg_name: sampler},
     )
     save_paths = []
     for i, audio in enumerate(audios):
         audio = audio.float().cpu()
         audio = fade_out(audio, seq_cfg.sampling_rate)
-        safe_prompt = (
-            "".join(c for c in prompt if c.isalnum() or c in (" ", "_"))
-            .rstrip()
-            .replace(" ", "_")[:50]
-        )
         current_time_string = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
         filename = f"{safe_prompt}_{current_time_string}_{i}.flac"
         save_path = OUTPUT_DIR / filename
@@ -198,9 +231,27 @@ gr_interface = gr.Interface(
         gr.Audio(label="🎵 Audio Sample 2"),
         gr.Textbox(label="Prompt Used", interactive=False)
     ],
     title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
     description="🎯 **RLHF数据收集**: 现在生成2个音频样本！收集反馈数据用于改进模型。使用分析工具: `python analyze_feedback.py`",
-    flagging_mode="never",
     examples=[
         ["Generate the festive sounds of a fireworks show: explosions lighting up the sky, crowd cheering, and the faint music playing in the background!! Celebration of the new year!", 10, 3, 1, "meanaudio_s_full"],
         ["Melodic human whistling harmonizing with natural birdsong", 10, 3, 1, "meanaudio_s_full"],
@@ -216,7 +267,7 @@ gr_interface = gr.Interface(
         ["A fork scrapes a plate, water drips slowly into a sink, and the faint hum of a refrigerator lingers in the background", 10, 3, 1, "meanaudio_s_full"]
     ],
     cache_examples="lazy", # Turn on to cache.
-)
 if __name__ == "__main__":
     ensure_models_downloaded()

     log.info(f"Preference feedback saved: {preference} for prompt: '{prompt[:50]}...'")
     return f"✅ Thanks for your feedback, preference recorded: {preference}"
+def save_preference_feedback_from_flag(input_text, duration, cfg_strength, num_steps, variant,
+                                     audio1_path, audio2_path, prompt_used, preference, comment):
+    """处理Gradio flagging回调的反馈保存"""
+    try:
+        if not preference:
+            print("⚠️ 用户没有选择偏好")
+            return
+        feedback_data = {
+            "timestamp": datetime.now().isoformat(),
+            "prompt": prompt_used or input_text,
+            "audio1_path": audio1_path,
+            "audio2_path": audio2_path,
+            "preference": preference,
+            "additional_comment": comment or "",
+            "generation_params": {
+                "duration": duration,
+                "cfg_strength": cfg_strength,
+                "num_steps": num_steps,
+                "variant": variant
+            }
+        }
+        with open(FEEDBACK_FILE, "a", encoding="utf-8") as f:
+            f.write(json.dumps(feedback_data, ensure_ascii=False) + "\n")
+        log.info(f"✅ 反馈已保存: {preference} - {prompt_used[:50]}...")
+        print(f"✅ 用户反馈已保存到: {FEEDBACK_FILE}")
+    except Exception as e:
+        log.error(f"保存反馈时出错: {e}")
+        print(f"❌ 保存反馈时出错: {e}")
 @spaces.GPU(duration=60)
 @torch.inference_mode()
         **{sampler_arg_name: sampler},
     )
     save_paths = []
+    safe_prompt = (
+        "".join(c for c in prompt if c.isalnum() or c in (" ", "_"))
+        .rstrip()
+        .replace(" ", "_")[:50]
+    )
     for i, audio in enumerate(audios):
         audio = audio.float().cpu()
         audio = fade_out(audio, seq_cfg.sampling_rate)
         current_time_string = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
         filename = f"{safe_prompt}_{current_time_string}_{i}.flac"
         save_path = OUTPUT_DIR / filename
         gr.Audio(label="🎵 Audio Sample 2"),
         gr.Textbox(label="Prompt Used", interactive=False)
     ],
+    additional_inputs=[
+        gr.Radio(
+            choices=[
+                ("🎵 Audio 1 更好", "audio1"),
+                ("🎵 Audio 2 更好", "audio2"),
+                ("😊 两者都很好", "equal"),
+                ("😔 两者都不好", "both_bad")
+            ],
+            label="🤔 请选择您更喜欢的音频:",
+            value=None
+        ),
+        gr.Textbox(
+            label="💭 评论 (可选)",
+            placeholder="您对音频质量的具体反馈...",
+            lines=2
+        )
+    ],
     title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
     description="🎯 **RLHF数据收集**: 现在生成2个音频样本！收集反馈数据用于改进模型。使用分析工具: `python analyze_feedback.py`",
+    flagging_mode="manual",
+    flagging_callback=lambda *args: save_preference_feedback_from_flag(*args),
     examples=[
         ["Generate the festive sounds of a fireworks show: explosions lighting up the sky, crowd cheering, and the faint music playing in the background!! Celebration of the new year!", 10, 3, 1, "meanaudio_s_full"],
         ["Melodic human whistling harmonizing with natural birdsong", 10, 3, 1, "meanaudio_s_full"],
         ["A fork scrapes a plate, water drips slowly into a sink, and the faint hum of a refrigerator lingers in the background", 10, 3, 1, "meanaudio_s_full"]
     ],
     cache_examples="lazy", # Turn on to cache.
+    )
 if __name__ == "__main__":
     ensure_models_downloaded()