File size: 1,735 Bytes
b3d97d5
f216530
b3d97d5
 
f216530
 
b3d97d5
f216530
 
 
 
b3d97d5
 
f216530
 
 
 
b3d97d5
 
f216530
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from transformers import pipeline, BertTokenizer, EncoderDecoderModel
import torch

# Load Whisper for speech-to-text
asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")

# Load your fine-tuned summarization model
model_name = "Imsachinsingh00/bert2bert-mts-summary"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = EncoderDecoderModel.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")

# Summarization function
def summarize_text(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
    outputs = model.generate(**inputs, max_length=64)
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

# Pipeline: audio β†’ transcription
def transcribe(audio):
    return asr(audio)["text"]

# App UI
with gr.Blocks() as demo:
    gr.Markdown("## 🩺 Medical Dialogue Summarizer")

    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(source="microphone", type="filepath", label="πŸŽ™οΈ Record Dialogue")
            transcribed_text = gr.Textbox(lines=10, label="πŸ“ Transcribed Text (editable)")

            record_button = gr.Button("🎧 Transcribe")
            record_button.click(transcribe, inputs=audio_input, outputs=transcribed_text)

        with gr.Column():
            summary_output = gr.Textbox(lines=10, label="πŸ“‹ Summary (output)", interactive=False)
            summarize_button = gr.Button("βœ‚οΈ Summarize")
            summarize_button.click(summarize_text, inputs=transcribed_text, outputs=summary_output)

    gr.Markdown("Built for Voize Interview β€” Powered by Whisper + BERT")

demo.launch()