File size: 2,053 Bytes
3568733
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM

# Load Whisper Odia ASR model (fine-tuned)
asr_model = WhisperForConditionalGeneration.from_pretrained("ashutoshpattnaik50/whisper-small-odia-finetuned")
processor = WhisperProcessor.from_pretrained("openai/whisper-small")

# Load IndicTrans2 Odia-to-English model
translator = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en")
translator_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en")

# Transcription + Translation function
def transcribe_and_translate(audio_path):
    # Load and preprocess audio
    from datasets import load_dataset, Audio
    import numpy as np
    import torchaudio

    speech_array, sampling_rate = torchaudio.load(audio_path)
    if sampling_rate != 16000:
        resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
        speech_array = resampler(speech_array)
    
    input_features = processor(speech_array.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_features

    predicted_ids = asr_model.generate(input_features)
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

    # Translate to English
    inputs = translator_tokenizer(transcription, return_tensors="pt")
    output_tokens = translator.generate(**inputs)
    translated = translator_tokenizer.batch_decode(output_tokens, skip_special_tokens=True)[0]

    return transcription.strip(), translated.strip()

# Gradio Interface
interface = gr.Interface(
    fn=transcribe_and_translate,
    inputs=gr.Audio(type="filepath", label="🎀 Record or Upload Odia Audio"),
    outputs=[
        gr.Textbox(label="πŸ“ Odia Transcription"),
        gr.Textbox(label="🌐 English Translation")
    ],
    title="Odia Whisper ASR + Translator",
    description="Speak in Odia and get instant transcription + English translation using Whisper and IndicTrans2.",
)

if __name__ == "__main__":
    interface.launch()