Spaces:

TTS-AGI
/

Voice-Clone-Arena

Running

App Files Files Community

kemuriririn commited on Jul 1

Commit

6deea8f

1 Parent(s): 050b9af

update check cache

Browse files

Files changed (1) hide show

app.py +56 -111

app.py CHANGED Viewed

@@ -6,8 +6,11 @@ from datetime import datetime
 import threading  # Added for locking
 from huggingface_hub.hf_api import RepoFile
 from sqlalchemy import or_  # Added for vote counting query
 import hashlib
 year = datetime.now().year
 month = datetime.now().month
@@ -667,117 +670,50 @@ def generate_tts():
     if len(available_models) < 2:
         return jsonify({"error": "Not enough TTS models available"}), 500
-    selected_models = get_weighted_random_models(available_models, 2, ModelType.TTS)
-    # 尝试从持久化缓存中查找两个模型的音频
-    audio_a_path = find_cached_audio(str(selected_models[0].id), text, reference_audio_path)
-    audio_b_path = find_cached_audio(str(selected_models[1].id), text, reference_audio_path)
-    if audio_a_path and audio_b_path:
-        app.logger.warning(f"Persistent Cache HIT for: '{text[:50]}...'. Using files directly.")
-        session_id = str(uuid.uuid4())
-        app.tts_sessions[session_id] = {
-            "model_a": selected_models[0].id,
-            "model_b": selected_models[1].id,
-            "audio_a": audio_a_path,
-            "audio_b": audio_b_path,
-            "text": text,
-            "created_at": datetime.utcnow(),
-            "expires_at": datetime.utcnow() + timedelta(minutes=30),
-            "voted": False,
-        }
-        return jsonify({
-            "session_id": session_id,
-            "audio_a": f"/api/tts/audio/{session_id}/a",
-            "audio_b": f"/api/tts/audio/{session_id}/b",
-            "expires_in": 1800,
-            "cache_hit": True,  # 可以认为这也是一种缓存命中
-        })
-    # --- 持久化缓存检查结束 ---
-    try:
-        audio_files = []
-        model_ids = []
-        # Function to process a single model (generate directly to TEMP_AUDIO_DIR, not cache subdir)
-        def process_model_on_the_fly(model):
-            app.logger.warning(f"Processing model {model.id} for text: '{text[:30]}...', prompt_md5: {prompt_md5}")
-            app.logger.warning(f"Expected key: {get_tts_cache_key(str(model.id), text, reference_audio_path)}")
-            # 传递 reference_audio_path 给 predict_tts
-            temp_audio_path = predict_tts(text, model.id, reference_audio_path=reference_audio_path,
-                                          user_token=user_token)
-            if not temp_audio_path or not os.path.exists(temp_audio_path):
-                raise ValueError(f"predict_tts failed for model {model.id}")
-            # Create a unique name in the main TEMP_AUDIO_DIR for the session
-            file_uuid = str(uuid.uuid4())
-            dest_path = os.path.join(TEMP_AUDIO_DIR, f"{file_uuid}.wav")
-            shutil.move(temp_audio_path, dest_path)  # Move from predict_tts's temp location
-            return {"model_id": model.id, "audio_path": dest_path}
-        # Use ThreadPoolExecutor to process models concurrently
-        with ThreadPoolExecutor(max_workers=2) as executor:
-            results = list(executor.map(process_model_on_the_fly, selected_models))
-        # Extract results
-        for result in results:
-            model_ids.append(result["model_id"])
-            audio_files.append(result["audio_path"])
-        # Create session
-        session_id = str(uuid.uuid4())
-        app.tts_sessions[session_id] = {
-            "model_a": model_ids[0],
-            "model_b": model_ids[1],
-            "audio_a": audio_files[0],  # Paths are now from TEMP_AUDIO_DIR directly
-            "audio_b": audio_files[1],
-            "text": text,
-            "created_at": datetime.utcnow(),
-            "expires_at": datetime.utcnow() + timedelta(minutes=30),
-            "voted": False,
-        }
-        # 清理临时参考音频文件
-        if reference_audio_path and os.path.exists(reference_audio_path):
-            os.remove(reference_audio_path)
-        # Check if text and prompt are in predefined libraries
-        if text in predefined_texts and prompt_md5 in predefined_prompts.values():
-            with preload_cache_lock:
-                preload_key = get_tts_cache_key(str(model_ids[0]), text, reference_audio_path)
-                preload_path = os.path.join(PRELOAD_CACHE_DIR, f"{preload_key}.wav")
-                shutil.copy(audio_files[0], preload_path)
-                app.logger.info(f"Preloaded cache audio saved: {preload_path}")
-                preload_key = get_tts_cache_key(str(model_ids[1]), text, reference_audio_path)
-                preload_path = os.path.join(PRELOAD_CACHE_DIR, f"{preload_key}.wav")
-                shutil.copy(audio_files[1], preload_path)
-                app.logger.info(f"Preloaded cache audio saved: {preload_path}")
-        # Return audio file paths and session
-        return jsonify(
-            {
-                "session_id": session_id,
-                "audio_a": f"/api/tts/audio/{session_id}/a",
-                "audio_b": f"/api/tts/audio/{session_id}/b",
-                "expires_in": 1800,
-                "cache_hit": False,
-            }
-        )
-    except Exception as e:
-        app.logger.error(f"TTS on-the-fly generation error: {str(e)}", exc_info=True)
-        # Cleanup any files potentially created during the failed attempt
-        if 'results' in locals():
-            for res in results:
-                if 'audio_path' in res and os.path.exists(res['audio_path']):
-                    try:
-                        os.remove(res['audio_path'])
-                    except OSError:
-                        pass
-        # 清理临时参考音频文件
-        if reference_audio_path and os.path.exists(reference_audio_path):
-            os.remove(reference_audio_path)
-        return jsonify({"error": f"Failed to generate TTS:{str(e)}"}), 500
     # --- End Cache Miss ---
@@ -1360,6 +1296,15 @@ def get_tts_cache_key(model_name, text, prompt_audio_path):
     return hashlib.md5(key_str.encode('utf-8')).hexdigest()
 if __name__ == "__main__":
     with app.app_context():
         # Ensure ./instance and ./votes directories exist

 import threading  # Added for locking
 from huggingface_hub.hf_api import RepoFile
+from pydub import AudioSegment, silence
 from sqlalchemy import or_  # Added for vote counting query
 import hashlib
+import numpy as np
+import wave
 year = datetime.now().year
 month = datetime.now().month
     if len(available_models) < 2:
         return jsonify({"error": "Not enough TTS models available"}), 500
+    # 新增：a和b模型都需通过缓存和静音检测
+    candidate_models = available_models.copy()
+    random.shuffle(candidate_models)
+    valid_pairs = []
+    # 枚举所有模型对，找到第一个都通过的组合
+    for i in range(len(candidate_models)):
+        for j in range(len(candidate_models)):
+            if i == j:
+                continue
+            model_a = candidate_models[i]
+            model_b = candidate_models[j]
+            audio_a_path = find_cached_audio(str(model_a.id), text, reference_audio_path)
+            audio_b_path = find_cached_audio(str(model_b.id), text, reference_audio_path)
+            if (audio_a_path and os.path.exists(audio_a_path)
+                and not has_long_silence(audio_a_path)
+                and audio_b_path and os.path.exists(audio_b_path)
+                and not has_long_silence(audio_b_path)):
+                valid_pairs.append((model_a, audio_a_path, model_b, audio_b_path))
+    if not valid_pairs:
+        return jsonify({"error": "所有模型均未通过持久化缓存和静音检测，无法生成音频"}), 500
+    # 随机选一个合格组合
+    model_a, audio_a_path, model_b, audio_b_path = random.choice(valid_pairs)
+    session_id = str(uuid.uuid4())
+    app.tts_sessions[session_id] = {
+        "model_a": model_a.id,
+        "model_b": model_b.id,
+        "audio_a": audio_a_path,
+        "audio_b": audio_b_path,
+        "text": text,
+        "created_at": datetime.utcnow(),
+        "expires_at": datetime.utcnow() + timedelta(minutes=30),
+        "voted": False,
+    }
+    # 清理临时参考音频文件
+    if reference_audio_path and os.path.exists(reference_audio_path):
+        os.remove(reference_audio_path)
+    return jsonify({
+        "session_id": session_id,
+        "audio_a": f"/api/tts/audio/{session_id}/a",
+        "audio_b": f"/api/tts/audio/{session_id}/b",
+        "expires_in": 1800,
+        "cache_hit": True,
+    })
     # --- End Cache Miss ---
     return hashlib.md5(key_str.encode('utf-8')).hexdigest()
+def has_long_silence(audio_path, min_silence_len_ms=10000, silence_thresh_db=-40):
+    try:
+        audio = AudioSegment.from_file(audio_path)
+        silent_ranges = silence.detect_silence(audio, min_silence_len=min_silence_len_ms, silence_thresh=silence_thresh_db)
+        return len(silent_ranges) > 0
+    except Exception as e:
+        print(f"无法分析音频文件 {audio_path}: {e}")
+        return False
 if __name__ == "__main__":
     with app.app_context():
         # Ensure ./instance and ./votes directories exist