Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -17,12 +17,13 @@ if not HF_TOKEN:
|
|
17 |
print("Warning: HF_TOKEN not set!")
|
18 |
|
19 |
def load_spacy_model():
|
|
|
20 |
try:
|
21 |
-
return spacy.load(
|
22 |
except OSError:
|
23 |
-
print("
|
24 |
-
subprocess.run(["python", "-m", "spacy", "download",
|
25 |
-
return spacy.load(
|
26 |
|
27 |
# SpaCy modelini yükle
|
28 |
nlp = load_spacy_model()
|
@@ -55,61 +56,36 @@ css = """
|
|
55 |
.tips {background: #e7f5ff; padding: 15px; border-radius: 5px; margin-top: 20px;}
|
56 |
"""
|
57 |
|
58 |
-
|
|
|
59 |
try:
|
60 |
-
|
61 |
-
|
62 |
|
63 |
-
|
|
|
64 |
|
65 |
-
#
|
66 |
-
|
67 |
-
progress(0.1, desc="Ses iyileştiriliyor...")
|
68 |
-
audio_file = clean_audio(audio_file)
|
69 |
-
|
70 |
-
# Transkripsiyon yap
|
71 |
-
progress(0.3, desc="Ses metne dönüştürülüyor...")
|
72 |
-
result = transcribe_file(
|
73 |
-
audio_file,
|
74 |
-
language="tr",
|
75 |
-
model_name=settings.ASR_MODEL
|
76 |
-
)
|
77 |
|
78 |
-
#
|
79 |
-
|
80 |
-
|
81 |
-
diarization_result = diarize_segments(result["segments"])
|
82 |
-
result["diarization"] = diarization_result
|
83 |
-
|
84 |
-
# Kişisel verileri anonimleştir
|
85 |
-
if anonymize:
|
86 |
-
progress(0.8, desc="Kişisel veriler anonimleştiriliyor...")
|
87 |
-
privacy_processor = MedicalPrivacyProcessor()
|
88 |
-
result["text"] = privacy_processor.anonymize_text(result["text"])
|
89 |
-
result["anonymized"] = True
|
90 |
-
|
91 |
-
# Sonucu formatla
|
92 |
-
progress(0.9, desc="Sonuçlar hazırlanıyor...")
|
93 |
-
formatted_text = ""
|
94 |
-
if diarize and "diarization" in result:
|
95 |
-
for segment in result["diarization"]:
|
96 |
-
speaker = segment["speaker"]
|
97 |
-
text = segment["text"]
|
98 |
-
start = segment["start"]
|
99 |
-
end = segment["end"]
|
100 |
-
formatted_text += f"[{speaker}] ({start:.1f}s - {end:.1f}s): {text}\n\n"
|
101 |
-
else:
|
102 |
-
formatted_text = result["text"]
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
|
110 |
except Exception as e:
|
111 |
-
print(f"Error
|
112 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
# Ana arayüz
|
115 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=css) as demo:
|
|
|
17 |
print("Warning: HF_TOKEN not set!")
|
18 |
|
19 |
def load_spacy_model():
|
20 |
+
"""Load SpaCy model with fallback to small model"""
|
21 |
try:
|
22 |
+
return spacy.load(settings.SPACY_MODEL)
|
23 |
except OSError:
|
24 |
+
print(f"Downloading {settings.SPACY_MODEL}...")
|
25 |
+
subprocess.run(["python", "-m", "spacy", "download", settings.SPACY_MODEL], check=True)
|
26 |
+
return spacy.load(settings.SPACY_MODEL)
|
27 |
|
28 |
# SpaCy modelini yükle
|
29 |
nlp = load_spacy_model()
|
|
|
56 |
.tips {background: #e7f5ff; padding: 15px; border-radius: 5px; margin-top: 20px;}
|
57 |
"""
|
58 |
|
59 |
+
def process_audio(audio_file, is_pediatrics=True):
|
60 |
+
"""Process audio with improved error handling"""
|
61 |
try:
|
62 |
+
# Clean audio
|
63 |
+
cleaned_audio = clean_audio(audio_file)
|
64 |
|
65 |
+
# Transcribe
|
66 |
+
transcription = transcribe_file(cleaned_audio)
|
67 |
|
68 |
+
# Diarize
|
69 |
+
diarization = diarize_segments(transcription["segments"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
# Process text
|
72 |
+
nlp = load_spacy_model()
|
73 |
+
processed_text = process_text(transcription, nlp, is_pediatrics)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
return {
|
76 |
+
"transcription": transcription,
|
77 |
+
"diarization": diarization,
|
78 |
+
"processed_text": processed_text
|
79 |
+
}
|
80 |
|
81 |
except Exception as e:
|
82 |
+
print(f"Error processing audio: {e}")
|
83 |
+
return {
|
84 |
+
"error": str(e),
|
85 |
+
"transcription": "",
|
86 |
+
"diarization": [],
|
87 |
+
"processed_text": ""
|
88 |
+
}
|
89 |
|
90 |
# Ana arayüz
|
91 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=css) as demo:
|