|
import gradio as gr |
|
from nemo.collections.asr.models import EncDecMultiTaskModel |
|
|
|
|
|
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b') |
|
|
|
|
|
def transcribe_audio(audio): |
|
|
|
predicted_text = canary_model.transcribe( |
|
paths2audio_files=[audio.name], |
|
batch_size=16 |
|
) |
|
return predicted_text[0] |
|
|
|
|
|
inputs = gr.inputs.Audio(source="microphone", label="Speak into the microphone", type="microphone") |
|
outputs = gr.outputs.Textbox(label="Transcription") |
|
title = "Canary ASR" |
|
description = "Transcribe speech from the microphone using the NeMo Canary ASR model." |
|
interface = gr.Interface(transcribe_audio, inputs, outputs, title=title, description=description) |
|
|
|
|
|
interface.launch() |
|
|