Seicas commited on
Commit
14e4ceb
·
verified ·
1 Parent(s): f631cbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -52
app.py CHANGED
@@ -17,12 +17,13 @@ if not HF_TOKEN:
17
  print("Warning: HF_TOKEN not set!")
18
 
19
  def load_spacy_model():
 
20
  try:
21
- return spacy.load("tr_core_news_md")
22
  except OSError:
23
- print("Türkçe SpaCy modeli indiriliyor...")
24
- subprocess.run(["python", "-m", "spacy", "download", "tr_core_news_md"], check=True)
25
- return spacy.load("tr_core_news_md")
26
 
27
  # SpaCy modelini yükle
28
  nlp = load_spacy_model()
@@ -55,61 +56,36 @@ css = """
55
  .tips {background: #e7f5ff; padding: 15px; border-radius: 5px; margin-top: 20px;}
56
  """
57
 
58
- async def process_audio(audio_file, diarize=True, enhance=True, anonymize=True, progress=gr.Progress()):
 
59
  try:
60
- if audio_file is None:
61
- return {"error": "Lütfen bir ses dosyası yükleyin."}, None
62
 
63
- progress(0, desc="Ses dosyası hazırlanıyor...")
 
64
 
65
- # Ses dosyasını temizle
66
- if enhance:
67
- progress(0.1, desc="Ses iyileştiriliyor...")
68
- audio_file = clean_audio(audio_file)
69
-
70
- # Transkripsiyon yap
71
- progress(0.3, desc="Ses metne dönüştürülüyor...")
72
- result = transcribe_file(
73
- audio_file,
74
- language="tr",
75
- model_name=settings.ASR_MODEL
76
- )
77
 
78
- # Konuşmacı ayrımı
79
- if diarize:
80
- progress(0.6, desc="Konuşmacılar ayrıştırılıyor...")
81
- diarization_result = diarize_segments(result["segments"])
82
- result["diarization"] = diarization_result
83
-
84
- # Kişisel verileri anonimleştir
85
- if anonymize:
86
- progress(0.8, desc="Kişisel veriler anonimleştiriliyor...")
87
- privacy_processor = MedicalPrivacyProcessor()
88
- result["text"] = privacy_processor.anonymize_text(result["text"])
89
- result["anonymized"] = True
90
-
91
- # Sonucu formatla
92
- progress(0.9, desc="Sonuçlar hazırlanıyor...")
93
- formatted_text = ""
94
- if diarize and "diarization" in result:
95
- for segment in result["diarization"]:
96
- speaker = segment["speaker"]
97
- text = segment["text"]
98
- start = segment["start"]
99
- end = segment["end"]
100
- formatted_text += f"[{speaker}] ({start:.1f}s - {end:.1f}s): {text}\n\n"
101
- else:
102
- formatted_text = result["text"]
103
 
104
- if result.get("anonymized"):
105
- formatted_text += "\n🔒 Kişisel veriler anonimleştirildi."
106
-
107
- progress(1.0, desc="Tamamlandı!")
108
- return result, formatted_text
109
 
110
  except Exception as e:
111
- print(f"Error in process_audio: {str(e)}")
112
- return {"error": f"İşlem sırasında hata: {str(e)}"}, None
 
 
 
 
 
113
 
114
  # Ana arayüz
115
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=css) as demo:
 
17
  print("Warning: HF_TOKEN not set!")
18
 
19
  def load_spacy_model():
20
+ """Load SpaCy model with fallback to small model"""
21
  try:
22
+ return spacy.load(settings.SPACY_MODEL)
23
  except OSError:
24
+ print(f"Downloading {settings.SPACY_MODEL}...")
25
+ subprocess.run(["python", "-m", "spacy", "download", settings.SPACY_MODEL], check=True)
26
+ return spacy.load(settings.SPACY_MODEL)
27
 
28
  # SpaCy modelini yükle
29
  nlp = load_spacy_model()
 
56
  .tips {background: #e7f5ff; padding: 15px; border-radius: 5px; margin-top: 20px;}
57
  """
58
 
59
+ def process_audio(audio_file, is_pediatrics=True):
60
+ """Process audio with improved error handling"""
61
  try:
62
+ # Clean audio
63
+ cleaned_audio = clean_audio(audio_file)
64
 
65
+ # Transcribe
66
+ transcription = transcribe_file(cleaned_audio)
67
 
68
+ # Diarize
69
+ diarization = diarize_segments(transcription["segments"])
 
 
 
 
 
 
 
 
 
 
70
 
71
+ # Process text
72
+ nlp = load_spacy_model()
73
+ processed_text = process_text(transcription, nlp, is_pediatrics)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ return {
76
+ "transcription": transcription,
77
+ "diarization": diarization,
78
+ "processed_text": processed_text
79
+ }
80
 
81
  except Exception as e:
82
+ print(f"Error processing audio: {e}")
83
+ return {
84
+ "error": str(e),
85
+ "transcription": "",
86
+ "diarization": [],
87
+ "processed_text": ""
88
+ }
89
 
90
  # Ana arayüz
91
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=css) as demo: