|
import torch |
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
models = ["openai/whisper-small", "openai/whisper-base", "openai/whisper-medium", "openai/whisper-large"] |
|
pipe = pipeline(task="automatic-speech-recognition", |
|
model="openai/whisper-small", |
|
device="cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
def initialize_pipeline(model_name): |
|
global pipe |
|
pipe = pipeline(task="automatic-speech-recognition", |
|
model=model_name, |
|
device="cuda" if torch.cuda.is_available() else "cpu") |
|
return model_name |
|
|
|
|
|
def transcribe(audio): |
|
if audio is None: |
|
return "No audio input received. Please try again." |
|
text = pipe(audio)["text"] |
|
return text |
|
|
|
|
|
interface = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), |
|
outputs="text", |
|
title="Whisper Small", |
|
description="Realtime demo for Speech recognition using a Whisper small model.", |
|
) |
|
|
|
with gr.Blocks() as interface: |
|
|
|
model_dropdown = gr.Dropdown(choices=models, value=models[0], label="Select Model") |
|
|
|
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Audio") |
|
|
|
text_output = gr.Textbox(label="Transcribed Text") |
|
|
|
transcribe_button = gr.Button("Transcribe") |
|
|
|
model_dropdown.change(fn=initialize_pipeline, inputs=model_dropdown, outputs=None) |
|
|
|
transcribe_button.click(fn=transcribe, inputs=[audio_input], outputs=text_output) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|