Spaces:

thecozietower
/

llm

Running

eienmojiki commited on Apr 27

Commit

b3bf7c9

1 Parent(s): 30451f1

feat: add automatic speech recognition route and integration

This commit introduces a new route for automatic speech recognition (ASR) functionality. The route allows users to upload audio files and transcribe them using a specified model from the Hugging Face Hub. The integration enhances the application's capabilities by providing ASR as a new feature.

Files changed (2) hide show

app.py +3 -2
routes/automaticSpeechRecognition.py +23 -0

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from huggingface_hub import InferenceClient
 from fastapi import FastAPI
-from routes import chatCompletion, textToImage
 app = FastAPI()
@@ -9,4 +9,5 @@ async def root():
     return {"message": "Hello World"}
 app.include_router(chatCompletion.router)
-app.include_router(textToImage.router)

 from huggingface_hub import InferenceClient
 from fastapi import FastAPI
+from routes import chatCompletion, textToImage, automaticSpeechRecognition
 app = FastAPI()
     return {"message": "Hello World"}
 app.include_router(chatCompletion.router)
+app.include_router(textToImage.router)
+app.include_router(automaticSpeechRecognition.router)

routes/automaticSpeechRecognition.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from fastapi import APIRouter, File, UploadFile, Form
+from huggingface_hub import InferenceClient
+router = APIRouter()
+@router.post("/v1/audio/transcriptions", tags=["Automatic Speech Recognition"])
+# Add model and audio parameters to the function signature
+async def automatic_speech_recognition(
+    model: str = Form(..., description="The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for ASR will be used."),
+    audio: UploadFile = File(..., description="The content to transcribe. It can be raw audio bytes, local audio file, or a URL to an audio file.")
+):
+    # Use the 'model' parameter from the form data
+    client = InferenceClient(model=model)
+    # Read the uploaded file content
+    audio_bytes = await audio.read()
+    # Pass the audio bytes to the client method
+    res = client.automatic_speech_recognition(
+        audio=audio_bytes
+    )
+    # Return the result
+    return res