eienmojiki commited on
Commit
b3bf7c9
·
1 Parent(s): 30451f1

feat: add automatic speech recognition route and integration

Browse files

This commit introduces a new route for automatic speech recognition (ASR) functionality. The route allows users to upload audio files and transcribe them using a specified model from the Hugging Face Hub. The integration enhances the application's capabilities by providing ASR as a new feature.

Files changed (2) hide show
  1. app.py +3 -2
  2. routes/automaticSpeechRecognition.py +23 -0
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from huggingface_hub import InferenceClient
2
  from fastapi import FastAPI
3
- from routes import chatCompletion, textToImage
4
 
5
  app = FastAPI()
6
 
@@ -9,4 +9,5 @@ async def root():
9
  return {"message": "Hello World"}
10
 
11
  app.include_router(chatCompletion.router)
12
- app.include_router(textToImage.router)
 
 
1
  from huggingface_hub import InferenceClient
2
  from fastapi import FastAPI
3
+ from routes import chatCompletion, textToImage, automaticSpeechRecognition
4
 
5
  app = FastAPI()
6
 
 
9
  return {"message": "Hello World"}
10
 
11
  app.include_router(chatCompletion.router)
12
+ app.include_router(textToImage.router)
13
+ app.include_router(automaticSpeechRecognition.router)
routes/automaticSpeechRecognition.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, File, UploadFile, Form
2
+ from huggingface_hub import InferenceClient
3
+
4
+ router = APIRouter()
5
+
6
+ @router.post("/v1/audio/transcriptions", tags=["Automatic Speech Recognition"])
7
+ # Add model and audio parameters to the function signature
8
+ async def automatic_speech_recognition(
9
+ model: str = Form(..., description="The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for ASR will be used."),
10
+ audio: UploadFile = File(..., description="The content to transcribe. It can be raw audio bytes, local audio file, or a URL to an audio file.")
11
+ ):
12
+ # Use the 'model' parameter from the form data
13
+ client = InferenceClient(model=model)
14
+
15
+ # Read the uploaded file content
16
+ audio_bytes = await audio.read()
17
+
18
+ # Pass the audio bytes to the client method
19
+ res = client.automatic_speech_recognition(
20
+ audio=audio_bytes
21
+ )
22
+ # Return the result
23
+ return res