DeploySOAP / app.py
syafiqq02's picture
commit gradio new
4bfa09f
raw
history blame
24.1 kB
import gradio as gr
import threading
import os
import requests
import string
import pygame
import time
from pydub import AudioSegment
from nltk.tokenize import word_tokenize
import nltk
from nltk.corpus import words, stopwords
from dotenv import load_dotenv
# Initialize pygame mixer for sound playback
pygame.mixer.init()
# Download resource NLTK (hanya sekali)
nltk.download('punkt')
nltk.download('words')
nltk.download('stopwords')
load_dotenv()
API_TRANSCRIBE = os.getenv("API_TRANSCRIBE")
API_TEXT = os.getenv("API_TEXT")
# Path ke file sound yang akan diputar (sesuaikan dengan file Anda)
NOTIFICATION_SOUND_PATH = "Berhenti.mp3" # Sound ketika recording selesai
START_RECORDING_SOUND_PATH = "Dimulai.mp3" # Sound ketika mulai recording
english_words = set(words.words())
indonesian_stopwords = set(stopwords.words('indonesian'))
def load_indonesian_wordlist(filepath='wordlist.lst'):
try:
with open(filepath, encoding='utf-8') as f:
return set(line.strip().lower() for line in f if line.strip())
except UnicodeDecodeError:
try:
with open(filepath, encoding='latin-1') as f:
return set(line.strip().lower() for line in f if line.strip())
except Exception:
return set()
except Exception:
return set()
indonesian_words = load_indonesian_wordlist()
valid_words = english_words.union(indonesian_words)
def contains_medical_terms_auto_threshold(text, medical_words):
tokens = word_tokenize(text.lower())
tokens = [w.strip(string.punctuation) for w in tokens if w.isalpha()]
if not tokens:
return False
medical_count = sum(1 for w in tokens if w in medical_words)
ratio = medical_count / len(tokens)
threshold = 0.4 if len(tokens) <= 5 else 0.1
return ratio >= threshold
medical_words = load_indonesian_wordlist('wordlist.lst')
MAX_DURATION_SECONDS = 600
def validate_audio_duration(audio_file):
try:
audio = AudioSegment.from_file(audio_file)
duration_sec = len(audio) / 1000.0
if duration_sec > MAX_DURATION_SECONDS:
return False, duration_sec
return True, duration_sec
except Exception as e:
return False, -1
def play_notification_sound():
"""Function untuk memainkan sound notification ketika recording selesai"""
try:
if os.path.exists(NOTIFICATION_SOUND_PATH):
pygame.mixer.music.load(NOTIFICATION_SOUND_PATH)
pygame.mixer.music.play()
print("πŸ”Š Playing completion notification sound...")
else:
print(f"⚠️ File completion sound tidak ditemukan: {NOTIFICATION_SOUND_PATH}")
except Exception as e:
print(f"❌ Error playing completion sound: {e}")
def play_start_recording_sound():
"""Function untuk memainkan sound ketika mulai recording"""
try:
if os.path.exists(START_RECORDING_SOUND_PATH):
# Menggunakan Sound effect untuk play bersamaan tanpa interrupt music
start_sound = pygame.mixer.Sound(START_RECORDING_SOUND_PATH)
start_sound.play()
print("🎡 Playing start recording sound...")
else:
print(f"⚠️ File start recording sound tidak ditemukan: {START_RECORDING_SOUND_PATH}")
except Exception as e:
print(f"❌ Error playing start recording sound: {e}")
def start_recording():
"""Function yang dipanggil ketika tombol record ditekan"""
print("πŸŽ™οΈ Recording started...")
# Play start recording sound
threading.Thread(target=play_start_recording_sound, daemon=True).start()
return "πŸŽ™οΈ Sedang merekam... Klik stop untuk menyelesaikan"
def stop_recording(audio):
"""Function yang dipanggil ketika recording selesai"""
if audio is not None:
print("βœ… Recording completed!")
# Play notification sound when recording is completed
threading.Thread(target=play_notification_sound, daemon=True).start()
return "βœ… Recording selesai! Audio siap diproses"
else:
print("❌ No audio recorded")
return "❌ Tidak ada audio yang direkam"
def test_microphone():
"""Function untuk test microphone"""
print("πŸ”§ Testing microphone...")
return "πŸ”§ Testing microphone... Silakan coba record lagi"
def reset_recording_status():
"""Function untuk reset status recording"""
return "πŸ“± Siap untuk merekam - Klik tombol record"
def handle_audio(audio_file):
"""Handle audio processing - returns (validation_message, transcript, soap, tags)"""
if audio_file is None:
return "❌ Tidak ada file audio", "", "", ""
valid, duration = validate_audio_duration(audio_file)
if not valid:
if duration == -1:
msg = "⚠️ Gagal memproses file audio."
else:
msg = f"⚠️ Durasi rekaman terlalu panjang ({duration:.1f}s). Maksimal {MAX_DURATION_SECONDS}s."
return msg, "", "", ""
try:
with open(audio_file, "rb") as f:
files = {"audio": f}
response = requests.post(API_TRANSCRIBE, files=files)
result = response.json()
transcription = result.get("transcription", "")
soap_content = result.get("soap_content", "")
tags_content = result.get("tags_content", "")
if not transcription and not soap_content and not tags_content:
return "⚠️ Tidak ada hasil dari proses audio", "", "", ""
return "", transcription, soap_content, tags_content
except Exception as e:
return f"❌ Error processing audio: {str(e)}", "", "", ""
def handle_text(dialogue):
"""Handle text processing - returns (validation_message, transcript, soap, tags)"""
if not dialogue.strip():
return "⚠️ Teks tidak boleh kosong", "", "", ""
if not contains_medical_terms_auto_threshold(dialogue, medical_words):
return "⚠️ Teks tidak mengandung istilah medis yang cukup untuk diproses.", "", "", ""
try:
response = requests.post(API_TEXT, json={"dialogue": dialogue})
result = response.json()
soap_content = result.get("soap_content", "")
tags_content = result.get("tags_content", "")
if not soap_content and not tags_content:
return "⚠️ Tidak ada hasil dari proses teks", "", "", ""
return "", dialogue, soap_content, tags_content
except Exception as e:
return f"❌ Error processing text: {str(e)}", "", "", ""
def toggle_inputs_with_refresh(choice):
# Tampilkan input dan validasi yang sesuai, sembunyikan lainnya
return (
gr.update(visible=(choice == "Upload Audio"), value=None), # audio upload
gr.update(visible=(choice == "Realtime Recording"), value=None), # audio record
gr.update(visible=(choice == "Input Teks"), value=""), # text input
gr.update(visible=(choice == "Upload Audio")), # validasi upload
gr.update(visible=(choice == "Realtime Recording")), # validasi realtime
gr.update(visible=(choice == "Input Teks")), # validasi teks
gr.update(visible=(choice == "Realtime Recording")), # recording status group
gr.update(visible=(choice == "Realtime Recording")), # record audio group
gr.update(value=""), # transcript
gr.update(value=""), # soap
gr.update(value=""), # tags
)
def clear_all_data():
return (
gr.update(value=None), # audio_upload
gr.update(value=None), # audio_record
gr.update(value=""), # text_input
gr.update(value=""), # validation_upload
gr.update(value=""), # validation_realtime
gr.update(value=""), # validation_text
gr.update(value="πŸ“± Siap untuk merekam"), # recording_status
gr.update(value=""), # transcript_output
gr.update(value=""), # soap_output
gr.update(value=""), # tags_output
)
def process_data(choice, audio_upload, audio_record, text_input):
"""
Process data based on choice and return results in correct order:
Returns: (validation_upload, validation_realtime, validation_text, transcript, soap, tags)
"""
if choice == "Upload Audio":
# Process upload audio
validation_msg, transcript, soap, tags = handle_audio(audio_upload)
return (
validation_msg, # validation_upload
"", # validation_realtime (empty)
"", # validation_text (empty)
transcript, # transcript_output
soap, # soap_output
tags # tags_output
)
elif choice == "Realtime Recording":
# Process realtime recording
validation_msg, transcript, soap, tags = handle_audio(audio_record)
return (
"", # validation_upload (empty)
validation_msg, # validation_realtime
"", # validation_text (empty)
transcript, # transcript_output
soap, # soap_output
tags # tags_output
)
elif choice == "Input Teks":
# Process text input
validation_msg, transcript, soap, tags = handle_text(text_input)
return (
"", # validation_upload (empty)
"", # validation_realtime (empty)
validation_msg, # validation_text
transcript, # transcript_output (will be same as input for text)
soap, # soap_output
tags # tags_output
)
else:
# Default case - clear all
return ("", "", "", "", "", "")
# Custom CSS untuk tampilan modern dengan alignment yang diperbaiki
modern_css = """
<style>
/* Background gradient yang modern */
.gradio-container {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
/* Header styling */
.main-header {
text-align: center;
padding: 2rem 0;
color: white;
margin-bottom: 2rem;
}
.main-header h1 {
font-size: 3rem;
font-weight: 700;
margin-bottom: 0.5rem;
text-shadow: 0 2px 4px rgba(0,0,0,0.3);
}
.main-header p {
font-size: 1.2rem;
opacity: 0.9;
font-weight: 300;
}
/* Card styling */
.input-card {
background: rgba(255, 255, 255, 0.95);
border-radius: 20px;
padding: 2rem;
margin: 0.8rem 0;
box-shadow: 0 8px 32px rgba(0,0,0,0.1);
backdrop-filter: blur(10px);
border: 1px solid rgba(255,255,255,0.2);
}
.output-card {
background: rgba(255, 255, 255, 0.95);
border-radius: 20px;
padding: 2rem;
box-shadow: 0 8px 32px rgba(0,0,0,0.1);
backdrop-filter: blur(10px);
border: 1px solid rgba(255,255,255,0.2);
}
/* Record audio section with padding */
.record-audio-section {
background: rgba(255, 255, 255, 0.95);
border-radius: 20px;
padding: 2rem;
margin: 0.8rem 0;
box-shadow: 0 8px 32px rgba(0,0,0,0.1);
backdrop-filter: blur(10px);
border: 1px solid rgba(255,255,255,0.2);
}
/* Output header styling - aligned properly */
.output-header {
text-align: left;
margin-bottom: 1.5rem;
padding-left: 0;
}
.output-header h3 {
font-size: 1.5rem;
font-weight: 600;
color: #2d3436;
margin: 0;
display: flex;
align-items: center;
}
/* Spacing adjustments */
.input-section {
margin-bottom: 1rem;
}
.button-section {
margin-top: 0.5rem;
margin-bottom: 1rem;
}
/* Updated output section styling with proper alignment */
.output-section {
background: rgba(255, 255, 255, 0.98);
border-radius: 15px;
padding: 1.5rem;
margin: 1rem 0 0 0;
box-shadow: 0 4px 20px rgba(0,0,0,0.08);
}
/* Ensure all output components are aligned consistently */
.output-container {
display: flex;
flex-direction: column;
gap: 1rem;
}
/* Button styling */
.record-btn {
background: linear-gradient(45deg, #ff6b6b, #ee5a24);
border: none;
border-radius: 50px;
padding: 1rem 2rem;
color: white;
font-weight: 600;
font-size: 1.1rem;
cursor: pointer;
transition: all 0.3s ease;
box-shadow: 0 4px 15px rgba(238, 90, 36, 0.4);
}
.record-btn:hover {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(238, 90, 36, 0.6);
}
.test-btn {
background: linear-gradient(45deg, #74b9ff, #0984e3);
border: none;
border-radius: 50px;
padding: 0.8rem 1.5rem;
color: white;
font-weight: 500;
cursor: pointer;
transition: all 0.3s ease;
}
.test-btn:hover {
transform: translateY(-1px);
box-shadow: 0 4px 15px rgba(116, 185, 255, 0.4);
}
.reset-btn {
background: linear-gradient(45deg, #a29bfe, #6c5ce7);
border: none;
border-radius: 15px;
padding: 0.4rem 1rem;
font-size: 0.8rem;
color: white;
font-weight: 500;
cursor: pointer;
transition: all 0.3s ease;
height: 2rem;
min-width: 80px;
}
.reset-btn:hover {
transform: translateY(-1px);
box-shadow: 0 2px 10px rgba(162, 155, 254, 0.4);
}
.process-btn {
background: linear-gradient(45deg, #00b894, #00cec9);
border: none;
border-radius: 50px;
padding: 1rem 2.5rem;
color: white;
font-weight: 600;
font-size: 1.2rem;
cursor: pointer;
transition: all 0.3s ease;
box-shadow: 0 4px 15px rgba(0, 184, 148, 0.4);
margin: 1rem 0;
}
.process-btn:hover {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(0, 184, 148, 0.6);
}
/* Status indicator */
.status-indicator {
background: rgba(255, 255, 255, 0.95);
border: 2px solid #74b9ff;
color: #2d3436;
padding: 2rem;
border-radius: 15px;
font-weight: 500;
margin: 1rem 0;
box-shadow: 0 4px 15px rgba(116, 185, 255, 0.2);
}
/* Input styling */
.audio-input {
border: 2px dashed #ddd;
border-radius: 15px;
padding: 2rem;
text-align: center;
background: rgba(255,255,255,0.5);
transition: all 0.3s ease;
}
.audio-input:hover {
border-color: #74b9ff;
background: rgba(116, 185, 255, 0.1);
}
/* Ensure proper text alignment in outputs */
.output-section textarea {
text-align: left;
vertical-align: top;
resize: vertical;
}
/* Responsive design */
@media (max-width: 768px) {
.main-header h1 {
font-size: 2rem;
}
.input-card, .output-card, .record-audio-section {
padding: 2rem;
}
.record-btn, .process-btn {
width: 100%;
margin: 0.5rem 0;
}
.reset-btn {
min-width: 70px;
font-size: 0.75rem;
}
}
/* Animation untuk elemen yang muncul */
@keyframes fadeInUp {
from {
opacity: 0;
transform: translateY(30px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.input-card, .output-card, .record-audio-section {
animation: fadeInUp 0.6s ease-out;
}
/* Microphone icon animation */
@keyframes pulse {
0% {
transform: scale(1);
}
50% {
transform: scale(1.05);
}
100% {
transform: scale(1);
}
}
.recording-active {
animation: pulse 1s infinite;
}
</style>
"""
# Buat interface dengan theme modern
with gr.Blocks(
title="🩺 SOAP AI - Modern Interface",
css=modern_css,
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="cyan",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter")
)
) as app:
# Header
gr.HTML("""
<div class="main-header">
<h1>πŸŽ™οΈ Realtime Recording</h1>
<p>High Quality Audio Recording with Smart Notifications</p>
</div>
""")
with gr.Row():
with gr.Column(scale=8):
input_choice = gr.Dropdown(
choices=["Upload Audio", "Realtime Recording", "Input Teks"],
value="Realtime Recording",
label="🎯 Pilih Metode Input",
container=True,
elem_classes=["input-dropdown"]
)
with gr.Column(scale=2):
clear_button = gr.Button(
"πŸ—‘οΈ Clear",
variant="secondary",
size="sm",
elem_classes=["reset-btn"]
)
# Input Section - Upload Audio
with gr.Group(elem_classes=["input-card"], visible=False) as upload_audio_group:
gr.HTML("<h3>πŸ“ Upload Audio File</h3>")
audio_upload = gr.Audio(
sources=["upload"],
label="πŸ“ Upload File Audio",
type="filepath",
show_download_button=False,
show_share_button=False,
interactive=True,
elem_classes=["audio-input"]
)
# Input Section - Record Audio with proper padding
with gr.Group(elem_classes=["record-audio-section"], visible=True) as record_audio_group:
gr.HTML("<h3>🎡 Record Your Audio</h3>")
audio_record = gr.Audio(
sources=["microphone"],
label="πŸŽ™οΈ Realtime Recording",
type="filepath",
show_download_button=True,
show_share_button=False,
interactive=True,
streaming=False,
elem_classes=["audio-input"]
)
# Input Section - Text Input (without record audio section)
with gr.Group(elem_classes=["input-card"], visible=False) as text_input_group:
gr.HTML("<h3>πŸ“ Input Teks</h3>")
text_input = gr.Textbox(
label="πŸ“ Masukkan Percakapan Dokter-Pasien",
lines=6,
placeholder="Ketik percakapan antara dokter dan pasien di sini...",
elem_classes=["text-input"]
)
# Status Section - hanya untuk Realtime Recording
recording_status_group = gr.Group(elem_classes=["status-indicator"], visible=True)
with recording_status_group:
gr.HTML("<h4>πŸ“Š Status Recording</h4>")
recording_status = gr.Textbox(
value="Siap untuk merekam",
interactive=False,
show_label=False,
lines=1,
elem_classes=["status-display"]
)
# Validation Section
validation_upload = gr.Textbox(
label="⚠️ Validasi Upload Audio",
lines=1,
interactive=False,
visible=False,
elem_classes=["validation-msg"]
)
validation_realtime = gr.Textbox(
label="⚠️ Validasi Realtime Recording",
lines=1,
interactive=False,
visible=True,
elem_classes=["validation-msg"]
)
validation_text = gr.Textbox(
label="⚠️ Validasi Input Teks",
lines=1,
interactive=False,
visible=False,
elem_classes=["validation-msg"]
)
# Process Button
process_button = gr.Button(
"πŸš€ Proses ke SOAP",
variant="primary",
size="lg",
elem_classes=["process-btn"]
)
# Output Section with proper alignment - Fixed Layout
with gr.Group(elem_classes=["output-card"]):
# Header aligned properly
with gr.Row():
with gr.Column():
gr.HTML('<div class="output-header"><h3>πŸ“‹ Hasil Analisis</h3></div>')
# All outputs in aligned container
with gr.Column(elem_classes=["output-container"]):
transcript_output = gr.Textbox(
label="πŸ“ Hasil Transkripsi",
lines=4,
elem_classes=["output-section"]
)
soap_output = gr.Textbox(
label="πŸ“‹ Ringkasan SOAP",
lines=8,
elem_classes=["output-section"]
)
tags_output = gr.Textbox(
label="🏷️ Medical Tags",
lines=6,
elem_classes=["output-section"]
)
# Footer
gr.HTML("""
<div style="text-align: center; padding: 2rem; color: rgba(255,255,255,0.7);">
<p>Use via API πŸ”₯ β€’ Built with Gradio πŸš€</p>
</div>
""")
# Event handlers untuk toggle inputs
input_choice.change(
fn=lambda choice: (
gr.update(visible=(choice == "Upload Audio")), # upload_audio_group
gr.update(visible=(choice == "Realtime Recording")), # record_audio_group
gr.update(visible=(choice == "Input Teks")), # text_input_group
gr.update(visible=(choice == "Upload Audio")), # validation_upload
gr.update(visible=(choice == "Realtime Recording")), # validation_realtime
gr.update(visible=(choice == "Input Teks")), # validation_text
gr.update(visible=(choice == "Realtime Recording")), # recording_status_group
gr.update(value=""), # transcript
gr.update(value=""), # soap
gr.update(value=""), # tags
),
inputs=input_choice,
outputs=[
upload_audio_group,
record_audio_group,
text_input_group,
validation_upload,
validation_realtime,
validation_text,
recording_status_group,
transcript_output,
soap_output,
tags_output,
],
)
# Event handlers untuk recording
audio_record.start_recording(
fn=start_recording,
outputs=recording_status
)
audio_record.stop_recording(
fn=stop_recording,
inputs=audio_record,
outputs=recording_status
)
clear_button.click(
fn=clear_all_data,
outputs=[
audio_upload,
audio_record,
text_input,
validation_upload,
validation_realtime,
validation_text,
recording_status,
transcript_output,
soap_output,
tags_output,
],
)
process_button.click(
fn=process_data,
inputs=[input_choice, audio_upload, audio_record, text_input],
outputs=[
validation_upload,
validation_realtime,
validation_text,
transcript_output,
soap_output,
tags_output,
],
show_progress="minimal",
)
# Startup information
if __name__ == "__main__":
print("πŸš€ Starting Enhanced SOAP AI Application with Modern UI...")
print("πŸ“‹ Setup Instructions:")
print("1. Install dependencies: pip install gradio pygame pydub nltk requests python-dotenv")
print(f"2. Place your sound files:")
print(f" - Start recording sound: {START_RECORDING_SOUND_PATH}")
print(f" - Completion sound: {NOTIFICATION_SOUND_PATH}")
print("3. Supported sound formats: WAV, MP3, OGG")
print("4. Make sure wordlist.lst file is available")
print("5. Set up your .env file with API_TRANSCRIBE and API_TEXT")
print()
# Cek apakah file sound ada
sounds_status = []
if os.path.exists(START_RECORDING_SOUND_PATH):
print(f"βœ… Start recording sound found: {START_RECORDING_SOUND_PATH}")
sounds_status.append("start")
else:
print(f"⚠️ Start recording sound not found: {START_RECORDING_SOUND_PATH}")
if os.path.exists(NOTIFICATION_SOUND_PATH):
print(f"βœ… Completion sound found: {NOTIFICATION_SOUND_PATH}")
sounds_status.append("completion")
else:
print(f"⚠️ Completion sound not found: {NOTIFICATION_SOUND_PATH}")
if not sounds_status:
print("πŸ“ Note: Add sound files to enable audio notifications for realtime recording")
elif len(sounds_status) == 1:
print("πŸ“ Note: Add the missing sound file for complete audio experience")
print("\n🌐 Application will start at: http://localhost:7860")
print("πŸŽ™οΈ Make sure to allow microphone access when using Realtime Recording!")
print("✨ New Modern UI with enhanced visual experience!")
print()
app.launch()