Spaces:

Seicas
/

VoiceToWrite

Sleeping

App Files Files Community

Seicas commited on Jun 17

Commit

14e4ceb

verified ·

1 Parent(s): f631cbd

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -52

app.py CHANGED Viewed

@@ -17,12 +17,13 @@ if not HF_TOKEN:
     print("Warning: HF_TOKEN not set!")
 def load_spacy_model():
     try:
-        return spacy.load("tr_core_news_md")
     except OSError:
-        print("Türkçe SpaCy modeli indiriliyor...")
-        subprocess.run(["python", "-m", "spacy", "download", "tr_core_news_md"], check=True)
-        return spacy.load("tr_core_news_md")
 # SpaCy modelini yükle
 nlp = load_spacy_model()
@@ -55,61 +56,36 @@ css = """
 .tips {background: #e7f5ff; padding: 15px; border-radius: 5px; margin-top: 20px;}
 """
-async def process_audio(audio_file, diarize=True, enhance=True, anonymize=True, progress=gr.Progress()):
     try:
-        if audio_file is None:
-            return {"error": "Lütfen bir ses dosyası yükleyin."}, None
-        progress(0, desc="Ses dosyası hazırlanıyor...")
-        # Ses dosyasını temizle
-        if enhance:
-            progress(0.1, desc="Ses iyileştiriliyor...")
-            audio_file = clean_audio(audio_file)
-        # Transkripsiyon yap
-        progress(0.3, desc="Ses metne dönüştürülüyor...")
-        result = transcribe_file(
-            audio_file,
-            language="tr",
-            model_name=settings.ASR_MODEL
-        )
-        # Konuşmacı ayrımı
-        if diarize:
-            progress(0.6, desc="Konuşmacılar ayrıştırılıyor...")
-            diarization_result = diarize_segments(result["segments"])
-            result["diarization"] = diarization_result
-        # Kişisel verileri anonimleştir
-        if anonymize:
-            progress(0.8, desc="Kişisel veriler anonimleştiriliyor...")
-            privacy_processor = MedicalPrivacyProcessor()
-            result["text"] = privacy_processor.anonymize_text(result["text"])
-            result["anonymized"] = True
-        # Sonucu formatla
-        progress(0.9, desc="Sonuçlar hazırlanıyor...")
-        formatted_text = ""
-        if diarize and "diarization" in result:
-            for segment in result["diarization"]:
-                speaker = segment["speaker"]
-                text = segment["text"]
-                start = segment["start"]
-                end = segment["end"]
-                formatted_text += f"[{speaker}] ({start:.1f}s - {end:.1f}s): {text}\n\n"
-        else:
-            formatted_text = result["text"]
-        if result.get("anonymized"):
-            formatted_text += "\n🔒 Kişisel veriler anonimleştirildi."
-        progress(1.0, desc="Tamamlandı!")
-        return result, formatted_text
     except Exception as e:
-        print(f"Error in process_audio: {str(e)}")
-        return {"error": f"İşlem sırasında hata: {str(e)}"}, None
 # Ana arayüz
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=css) as demo:

     print("Warning: HF_TOKEN not set!")
 def load_spacy_model():
+    """Load SpaCy model with fallback to small model"""
     try:
+        return spacy.load(settings.SPACY_MODEL)
     except OSError:
+        print(f"Downloading {settings.SPACY_MODEL}...")
+        subprocess.run(["python", "-m", "spacy", "download", settings.SPACY_MODEL], check=True)
+        return spacy.load(settings.SPACY_MODEL)
 # SpaCy modelini yükle
 nlp = load_spacy_model()
 .tips {background: #e7f5ff; padding: 15px; border-radius: 5px; margin-top: 20px;}
 """
+def process_audio(audio_file, is_pediatrics=True):
+    """Process audio with improved error handling"""
     try:
+        # Clean audio
+        cleaned_audio = clean_audio(audio_file)
+        # Transcribe
+        transcription = transcribe_file(cleaned_audio)
+        # Diarize
+        diarization = diarize_segments(transcription["segments"])
+        # Process text
+        nlp = load_spacy_model()
+        processed_text = process_text(transcription, nlp, is_pediatrics)
+        return {
+            "transcription": transcription,
+            "diarization": diarization,
+            "processed_text": processed_text
+        }
     except Exception as e:
+        print(f"Error processing audio: {e}")
+        return {
+            "error": str(e),
+            "transcription": "",
+            "diarization": [],
+            "processed_text": ""
+        }
 # Ana arayüz
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=css) as demo: