Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| import gradio as gr | |
| import spaces | |
| from transformers import pipeline, Pipeline | |
| is_hf_space = os.getenv("IS_HF_SPACE") | |
| model_ids = [ | |
| "", | |
| "mozilla-ai/whisper-small-gl (Galician)", | |
| "mozilla-ai/whisper-small-el (Greek)", | |
| "openai/whisper-tiny (Multilingual)", | |
| "openai/whisper-small (Multilingual)", | |
| "openai/whisper-medium (Multilingual)", | |
| "openai/whisper-large-v3 (Multilingual)", | |
| "openai/whisper-large-v3-turbo (Multilingual)", | |
| ] | |
| def _load_local_model(model_dir: str) -> Pipeline: | |
| from transformers import ( | |
| WhisperProcessor, | |
| WhisperTokenizer, | |
| WhisperFeatureExtractor, | |
| WhisperForConditionalGeneration, | |
| ) | |
| processor = WhisperProcessor.from_pretrained(model_dir) | |
| tokenizer = WhisperTokenizer.from_pretrained(model_dir, task="transcribe") | |
| feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir) | |
| model = WhisperForConditionalGeneration.from_pretrained(model_dir) | |
| try: | |
| return pipeline( | |
| task="automatic-speech-recognition", | |
| model=model, | |
| processor=processor, | |
| tokenizer=tokenizer, | |
| feature_extractor=feature_extractor, | |
| ) | |
| except Exception as e: | |
| return str(e) | |
| def _load_hf_model(model_repo_id: str) -> Pipeline: | |
| try: | |
| return pipeline( | |
| "automatic-speech-recognition", | |
| model=model_repo_id, | |
| ) | |
| except Exception as e: | |
| return str(e) | |
| def transcribe( | |
| dropdown_model_id: str, | |
| hf_model_id: str, | |
| local_model_id: str, | |
| audio: gr.Audio, | |
| ) -> str: | |
| if dropdown_model_id and not hf_model_id and not local_model_id: | |
| dropdown_model_id = dropdown_model_id.split(" (")[0] | |
| pipe = _load_hf_model(dropdown_model_id) | |
| elif hf_model_id and not local_model_id and not dropdown_model_id: | |
| pipe = _load_hf_model(hf_model_id) | |
| elif local_model_id and not hf_model_id and not dropdown_model_id: | |
| pipe = _load_local_model(local_model_id) | |
| else: | |
| return ( | |
| "⚠️ Error: Please select or fill at least and only one of the options above" | |
| ) | |
| if isinstance(pipe, str): | |
| # Exception raised when loading | |
| return f"⚠️ Error: {pipe}" | |
| text = pipe(audio)["text"] | |
| return text | |
| def setup_gradio_demo(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ # 🗣️ Speech-to-Text Transcription | |
| ### 1. Select which model to use from one of the options below. | |
| ### 2. Record a message or upload an audio file. | |
| ### 3. Click Transcribe to see the transcription generated by the model. | |
| """ | |
| ) | |
| ### Model selection ### | |
| with gr.Row(): | |
| with gr.Column(): | |
| dropdown_model = gr.Dropdown( | |
| choices=model_ids, label="Option 1: Select a model" | |
| ) | |
| with gr.Column(): | |
| user_model = gr.Textbox( | |
| label="Option 2: Paste HF model id", | |
| placeholder="my-username/my-whisper-tiny", | |
| ) | |
| with gr.Column(visible=not is_hf_space): | |
| local_model = gr.Textbox( | |
| label="Option 3: Paste local path to model directory", | |
| placeholder="artifacts/my-whisper-tiny", | |
| ) | |
| ### Transcription ### | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="Record a message / Upload audio file", | |
| show_download_button=True, | |
| max_length=30, | |
| ) | |
| transcribe_button = gr.Button("Transcribe") | |
| transcribe_output = gr.Text(label="Output") | |
| transcribe_button.click( | |
| fn=transcribe, | |
| inputs=[dropdown_model, user_model, local_model, audio_input], | |
| outputs=transcribe_output, | |
| ) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| setup_gradio_demo() | |