Spaces:
Running
Running
Commit
·
b3bf7c9
1
Parent(s):
30451f1
feat: add automatic speech recognition route and integration
Browse filesThis commit introduces a new route for automatic speech recognition (ASR) functionality. The route allows users to upload audio files and transcribe them using a specified model from the Hugging Face Hub. The integration enhances the application's capabilities by providing ASR as a new feature.
- app.py +3 -2
- routes/automaticSpeechRecognition.py +23 -0
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from huggingface_hub import InferenceClient
|
2 |
from fastapi import FastAPI
|
3 |
-
from routes import chatCompletion, textToImage
|
4 |
|
5 |
app = FastAPI()
|
6 |
|
@@ -9,4 +9,5 @@ async def root():
|
|
9 |
return {"message": "Hello World"}
|
10 |
|
11 |
app.include_router(chatCompletion.router)
|
12 |
-
app.include_router(textToImage.router)
|
|
|
|
1 |
from huggingface_hub import InferenceClient
|
2 |
from fastapi import FastAPI
|
3 |
+
from routes import chatCompletion, textToImage, automaticSpeechRecognition
|
4 |
|
5 |
app = FastAPI()
|
6 |
|
|
|
9 |
return {"message": "Hello World"}
|
10 |
|
11 |
app.include_router(chatCompletion.router)
|
12 |
+
app.include_router(textToImage.router)
|
13 |
+
app.include_router(automaticSpeechRecognition.router)
|
routes/automaticSpeechRecognition.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, File, UploadFile, Form
|
2 |
+
from huggingface_hub import InferenceClient
|
3 |
+
|
4 |
+
router = APIRouter()
|
5 |
+
|
6 |
+
@router.post("/v1/audio/transcriptions", tags=["Automatic Speech Recognition"])
|
7 |
+
# Add model and audio parameters to the function signature
|
8 |
+
async def automatic_speech_recognition(
|
9 |
+
model: str = Form(..., description="The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for ASR will be used."),
|
10 |
+
audio: UploadFile = File(..., description="The content to transcribe. It can be raw audio bytes, local audio file, or a URL to an audio file.")
|
11 |
+
):
|
12 |
+
# Use the 'model' parameter from the form data
|
13 |
+
client = InferenceClient(model=model)
|
14 |
+
|
15 |
+
# Read the uploaded file content
|
16 |
+
audio_bytes = await audio.read()
|
17 |
+
|
18 |
+
# Pass the audio bytes to the client method
|
19 |
+
res = client.automatic_speech_recognition(
|
20 |
+
audio=audio_bytes
|
21 |
+
)
|
22 |
+
# Return the result
|
23 |
+
return res
|