ashpikachu2k1 commited on
Commit
3568733
·
verified ·
1 Parent(s): 62908bf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ # Load Whisper Odia ASR model (fine-tuned)
6
+ asr_model = WhisperForConditionalGeneration.from_pretrained("ashutoshpattnaik50/whisper-small-odia-finetuned")
7
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small")
8
+
9
+ # Load IndicTrans2 Odia-to-English model
10
+ translator = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en")
11
+ translator_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en")
12
+
13
+ # Transcription + Translation function
14
+ def transcribe_and_translate(audio_path):
15
+ # Load and preprocess audio
16
+ from datasets import load_dataset, Audio
17
+ import numpy as np
18
+ import torchaudio
19
+
20
+ speech_array, sampling_rate = torchaudio.load(audio_path)
21
+ if sampling_rate != 16000:
22
+ resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
23
+ speech_array = resampler(speech_array)
24
+
25
+ input_features = processor(speech_array.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_features
26
+
27
+ predicted_ids = asr_model.generate(input_features)
28
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
29
+
30
+ # Translate to English
31
+ inputs = translator_tokenizer(transcription, return_tensors="pt")
32
+ output_tokens = translator.generate(**inputs)
33
+ translated = translator_tokenizer.batch_decode(output_tokens, skip_special_tokens=True)[0]
34
+
35
+ return transcription.strip(), translated.strip()
36
+
37
+ # Gradio Interface
38
+ interface = gr.Interface(
39
+ fn=transcribe_and_translate,
40
+ inputs=gr.Audio(type="filepath", label="🎤 Record or Upload Odia Audio"),
41
+ outputs=[
42
+ gr.Textbox(label="📝 Odia Transcription"),
43
+ gr.Textbox(label="🌐 English Translation")
44
+ ],
45
+ title="Odia Whisper ASR + Translator",
46
+ description="Speak in Odia and get instant transcription + English translation using Whisper and IndicTrans2.",
47
+ )
48
+
49
+ if __name__ == "__main__":
50
+ interface.launch()