File size: 832 Bytes
35f8a26
c9adb84
35f8a26
c9adb84
35f8a26
 
c9adb84
35f8a26
c9adb84
 
 
 
 
 
35f8a26
c9adb84
 
 
 
 
 
35f8a26
c9adb84
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import gradio as gr
from nemo.collections.asr.models import EncDecMultiTaskModel

# Load the model
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')

# Define ASR function
def transcribe_audio(audio):
    # Perform transcription
    predicted_text = canary_model.transcribe(
        paths2audio_files=[audio.name],
        batch_size=16  # Batch size for inference
    )
    return predicted_text[0]

# Interface
inputs = gr.inputs.Audio(source="microphone", label="Speak into the microphone", type="microphone")
outputs = gr.outputs.Textbox(label="Transcription")
title = "Canary ASR"
description = "Transcribe speech from the microphone using the NeMo Canary ASR model."
interface = gr.Interface(transcribe_audio, inputs, outputs, title=title, description=description)

# Launch interface
interface.launch()