Spaces:

chenxie95
/

MeanAudio

Running on Zero

App Files Files Community

AndreasXi commited on 10 days ago

Commit

629a90b

1 Parent(s): 2b7760c

add rlhf

Browse files

Files changed (2) hide show

app.py +41 -16
feedback_collector.py +127 -0

app.py CHANGED Viewed

@@ -23,6 +23,7 @@ from meanaudio.model.utils.features_utils import FeaturesUtils
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
 import gc
 from datetime import datetime
 from huggingface_hub import snapshot_download
 import numpy as np
@@ -38,6 +39,11 @@ OUTPUT_DIR = Path("./output/gradio")
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 NUM_SAMPLE = 2
 # Global model cache to avoid reloading
 MODEL_CACHE = {}
 FEATURE_UTILS_CACHE = {}
@@ -80,6 +86,22 @@ def load_model_cache():
             ).to(device, torch.bfloat16).eval()
             FEATURE_UTILS_CACHE['default'] = feature_utils
 @spaces.GPU(duration=60)
 @torch.inference_mode()
@@ -97,7 +119,7 @@ def generate_audio_gradio(
         raise ValueError(f"Unknown model variant: {variant}. Available: {list(all_model_cfg.keys())}")
     net, feature_utils = MODEL_CACHE[variant], FEATURE_UTILS_CACHE['default']
     model = all_model_cfg[variant]
     seq_cfg = model.seq_cfg
     seq_cfg.duration = duration
@@ -142,21 +164,21 @@ def generate_audio_gradio(
         audio = fade_out(audio, seq_cfg.sampling_rate)
-        safe_prompt = (
-            "".join(c for c in prompt if c.isalnum() or c in (" ", "_"))
-            .rstrip()
-            .replace(" ", "_")[:50]
-        )
-        current_time_string = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
-        filename = f"{safe_prompt}_{current_time_string}_{i}.flac"
-        save_path = OUTPUT_DIR / filename
-        torchaudio.save(str(save_path), audio, seq_cfg.sampling_rate)
-        log.info(f"Audio saved to {save_path}")
-        save_paths.append(str(save_path))
     if device == "cuda":
         torch.cuda.empty_cache()
-    return save_paths
 # Gradio input and output components
@@ -171,9 +193,13 @@ variant = gr.Dropdown(label="Model Variant", choices=list(all_model_cfg.keys()),
 gr_interface = gr.Interface(
     fn=generate_audio_gradio,
     inputs=[input_text, duration, cfg_strength, denoising_steps, variant],
-    outputs=["audio", "audio"],
     title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
-    description="",
     flagging_mode="never",
     examples=[
         ["Generate the festive sounds of a fireworks show: explosions lighting up the sky, crowd cheering, and the faint music playing in the background!! Celebration of the new year!", 10, 3, 1, "meanaudio_s_full"],
@@ -193,7 +219,6 @@ gr_interface = gr.Interface(
 )
 if __name__ == "__main__":
     ensure_models_downloaded()
     load_model_cache()
     gr_interface.queue(15).launch()

 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
 import gc
+import json
 from datetime import datetime
 from huggingface_hub import snapshot_download
 import numpy as np
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 NUM_SAMPLE = 2
+# 创建RLHF反馈数据目录
+FEEDBACK_DIR = Path("./rlhf")
+FEEDBACK_DIR.mkdir(exist_ok=True)
+FEEDBACK_FILE = FEEDBACK_DIR / "user_preferences.jsonl"
 # Global model cache to avoid reloading
 MODEL_CACHE = {}
 FEATURE_UTILS_CACHE = {}
             ).to(device, torch.bfloat16).eval()
             FEATURE_UTILS_CACHE['default'] = feature_utils
+def save_preference_feedback(prompt, audio1_path, audio2_path, preference, additional_comment=""):
+    feedback_data = {
+        "timestamp": datetime.now().isoformat(),
+        "prompt": prompt,
+        "audio1_path": audio1_path,
+        "audio2_path": audio2_path,
+        "preference": preference,  # "audio1", "audio2", "equal", "both_bad"
+        "additional_comment": additional_comment
+    }
+    with open(FEEDBACK_FILE, "a", encoding="utf-8") as f:
+        f.write(json.dumps(feedback_data, ensure_ascii=False) + "\n")
+    log.info(f"Preference feedback saved: {preference} for prompt: '{prompt[:50]}...'")
+    return f"✅ Thanks for your feedback, preference recorded: {preference}"
 @spaces.GPU(duration=60)
 @torch.inference_mode()
         raise ValueError(f"Unknown model variant: {variant}. Available: {list(all_model_cfg.keys())}")
     net, feature_utils = MODEL_CACHE[variant], FEATURE_UTILS_CACHE['default']
     model = all_model_cfg[variant]
     seq_cfg = model.seq_cfg
     seq_cfg.duration = duration
         audio = fade_out(audio, seq_cfg.sampling_rate)
+    safe_prompt = (
+        "".join(c for c in prompt if c.isalnum() or c in (" ", "_"))
+        .rstrip()
+        .replace(" ", "_")[:50]
+    )
+    current_time_string = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    filename = f"{safe_prompt}_{current_time_string}_{i}.flac"
+    save_path = OUTPUT_DIR / filename
+    torchaudio.save(str(save_path), audio, seq_cfg.sampling_rate)
+    log.info(f"Audio saved to {save_path}")
+    save_paths.append(str(save_path))
     if device == "cuda":
         torch.cuda.empty_cache()
+    return save_paths[0], save_paths[1], prompt
 # Gradio input and output components
 gr_interface = gr.Interface(
     fn=generate_audio_gradio,
     inputs=[input_text, duration, cfg_strength, denoising_steps, variant],
+    outputs=[
+        gr.Audio(label="🎵 Audio Sample 1"),
+        gr.Audio(label="🎵 Audio Sample 2"),
+        gr.Textbox(label="Prompt Used", interactive=False)
+    ],
     title="MeanAudio: Fast and Faithful Text-to-Audio Generation with Mean Flows",
+    description="🎯 **RLHF数据收集**: 现在生成2个音频样本！收集反馈数据用于改进模型。使用分析工具: `python analyze_feedback.py`",
     flagging_mode="never",
     examples=[
         ["Generate the festive sounds of a fireworks show: explosions lighting up the sky, crowd cheering, and the faint music playing in the background!! Celebration of the new year!", 10, 3, 1, "meanaudio_s_full"],
 )
 if __name__ == "__main__":
     ensure_models_downloaded()
     load_model_cache()
     gr_interface.queue(15).launch()

feedback_collector.py ADDED Viewed

	@@ -0,0 +1,127 @@

+#!/usr/bin/env python3
+"""
+简单的反馈收集工具
+在MeanAudio生成音频后，运行此脚本收集用户偏好
+"""
+import json
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+import gradio as gr
+# 设置反馈目录
+FEEDBACK_DIR = Path("./rlhf_feedback")
+FEEDBACK_DIR.mkdir(exist_ok=True)
+FEEDBACK_FILE = FEEDBACK_DIR / "user_preferences.jsonl"
+def save_feedback(audio1_path, audio2_path, prompt, preference, comment=""):
+    """保存反馈数据"""
+    feedback_data = {
+        "timestamp": datetime.now().isoformat(),
+        "prompt": prompt,
+        "audio1_path": audio1_path,
+        "audio2_path": audio2_path,
+        "preference": preference,
+        "additional_comment": comment
+    }
+    with open(FEEDBACK_FILE, "a", encoding="utf-8") as f:
+        f.write(json.dumps(feedback_data, ensure_ascii=False) + "\n")
+    return f"✅ 反馈已保存！偏好: {preference}"
+def create_feedback_interface():
+    """创建反馈收集界面"""
+    with gr.Blocks(title="MeanAudio 反馈收集器") as demo:
+        gr.Markdown("# MeanAudio 反馈收集器")
+        gr.Markdown("*请输入生成的音频文件路径和提示词，然后选择您的偏好*")
+        with gr.Row():
+            with gr.Column():
+                prompt_input = gr.Textbox(
+                    label="提示词",
+                    placeholder="输入用于生成音频的提示词..."
+                )
+                audio1_path = gr.Textbox(
+                    label="音频文件1路径",
+                    placeholder="./output/gradio/prompt_timestamp_0.flac"
+                )
+                audio2_path = gr.Textbox(
+                    label="音频文件2路径",
+                    placeholder="./output/gradio/prompt_timestamp_1.flac"
+                )
+            with gr.Column():
+                # 显示音频
+                audio1_player = gr.Audio(label="音频1")
+                audio2_player = gr.Audio(label="音频2")
+                load_btn = gr.Button("🔄 加载音频文件")
+        # 反馈区域
+        gr.Markdown("---")
+        gr.Markdown("### 请选择您的偏好")
+        preference = gr.Radio(
+            choices=[
+                ("音频1更好", "audio1"),
+                ("音频2更好", "audio2"),
+                ("两者质量相等", "equal"),
+                ("两者都不好", "both_bad")
+            ],
+            label="哪个音频更好？"
+        )
+        comment = gr.Textbox(
+            label="额外评论 (可选)",
+            placeholder="关于音频质量的具体反馈...",
+            lines=3
+        )
+        submit_btn = gr.Button("📝 提交反馈", variant="primary")
+        result = gr.Textbox(label="结果", interactive=False)
+        # 事件处理
+        def load_audio_files(path1, path2):
+            """加载音频文件用于播放"""
+            audio1 = path1 if os.path.exists(path1) else None
+            audio2 = path2 if os.path.exists(path2) else None
+            return audio1, audio2
+        load_btn.click(
+            fn=load_audio_files,
+            inputs=[audio1_path, audio2_path],
+            outputs=[audio1_player, audio2_player]
+        )
+        submit_btn.click(
+            fn=save_feedback,
+            inputs=[audio1_path, audio2_path, prompt_input, preference, comment],
+            outputs=[result]
+        )
+        # 使用说明
+        gr.Markdown("---")
+        gr.Markdown("""
+        ### 使用说明
+        1. 先运行 MeanAudio 生成两个音频文件
+        2. 将生成的音频文件路径复制到上面的输入框中
+        3. 点击"加载音频文件"来播放音频
+        4. 选择您的偏好并提交反馈
+        5. 反馈数据将保存到 `./rlhf_feedback/user_preferences.jsonl`
+        6. 使用 `python analyze_feedback.py` 分析收集的反馈数据
+        """)
+    return demo
+if __name__ == "__main__":
+    demo = create_feedback_interface()
+    print("启动反馈收集界面...")
+    print(f"反馈数据将保存到: {FEEDBACK_FILE}")
+    demo.launch(server_name="127.0.0.1", server_port=7861)