Spaces:

abdibrahem
/

ai-agent

Sleeping

abdibrahem commited on Jun 26

Commit

e019578

1 Parent(s): a781967

Add voice to text feature and update the requirements file

Files changed (3) hide show

main.py CHANGED Viewed

@@ -28,10 +28,13 @@ import asyncio
 # Import endpoints documentation
 from endpoints_documentation import endpoints_documentation
 from cache_manager import CacheManager
 # Set environment variables for HuggingFace
 os.environ["HF_HOME"] = "/tmp/huggingface"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 class ChatMessage(BaseModel):
@@ -1070,7 +1073,7 @@ class HealthcareChatbot:
 # if __name__ == "__main__":
 #     main()
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import Dict, Any, Optional
@@ -1099,6 +1102,20 @@ async def process_query(request: QueryRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
     """

 # Import endpoints documentation
 from endpoints_documentation import endpoints_documentation
 from cache_manager import CacheManager
+from voice_util import load_audio
+import whisper
 # Set environment variables for HuggingFace
 os.environ["HF_HOME"] = "/tmp/huggingface"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
+voice_to_text_model = whisper.load_model("small", device='cpu')
 class ChatMessage(BaseModel):
 # if __name__ == "__main__":
 #     main()
+from fastapi import FastAPI, HTTPException, UploadFile, File
 from pydantic import BaseModel
 from typing import Dict, Any, Optional
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.post("/voice-text")
+async def process_query(file: UploadFile = File(...)):
+    """
+    Process a user voice and return a response
+    """
+    try:
+        audio_bytes = await file.read()
+        audio_numpy = load_audio(audio_bytes)
+        text_response = voice_to_text_model.transcribe(audio_numpy, fp16=False)
+        response = agent.chat(text_response['text']).message
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
     """

requirements.txt CHANGED Viewed

@@ -80,3 +80,55 @@ uvicorn==0.34.2
 vaderSentiment==3.3.2
 yarl==1.20.0
 zstandard==0.23.0

 vaderSentiment==3.3.2
 yarl==1.20.0
 zstandard==0.23.0
+annotated-types==0.7.0
+anyio==4.9.0
+audioread==3.0.1
+certifi==2025.6.15
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.2.1
+decorator==5.2.1
+fastapi==0.115.13
+filelock==3.18.0
+fsspec==2025.5.1
+h11==0.16.0
+idna==3.10
+Jinja2==3.1.6
+joblib==1.5.1
+lazy_loader==0.4
+librosa==0.11.0
+llvmlite==0.44.0
+MarkupSafe==3.0.2
+more-itertools==10.7.0
+mpmath==1.3.0
+msgpack==1.1.1
+networkx==3.5
+numba==0.61.2
+numpy==1.26.4
+openai-whisper @ git+https://github.com/openai/whisper.git@dd985ac4b90cafeef8712f2998d62c59c3e62d22
+packaging==25.0
+platformdirs==4.3.8
+pooch==1.8.2
+pycparser==2.22
+pydantic==2.11.7
+pydantic_core==2.33.2
+python-multipart==0.0.20
+regex==2024.11.6
+requests==2.32.4
+scikit-learn==1.7.0
+scipy==1.16.0
+six==1.17.0
+sniffio==1.3.1
+soundfile==0.13.1
+soxr==0.5.0.post1
+starlette==0.46.2
+sympy==1.14.0
+threadpoolctl==3.6.0
+tiktoken==0.9.0
+torch==2.2.2
+tqdm==4.67.1
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+urllib3==2.5.0
+uvicorn==0.34.3
+whisper==1.1.10

voice_util.py ADDED Viewed

+import whisper
+import numpy as np
+import soundfile as sf
+import io
+from tempfile import NamedTemporaryFile
+import os
+def load_audio(file_bytes):
+    # Load audio and convert to Whisper's required format
+    audio, sr = sf.read(io.BytesIO(file_bytes))
+    # Convert to mono if stereo
+    if len(audio.shape) > 1:
+        audio = np.mean(audio, axis=1)
+    # Resample to 16kHz if needed
+    if sr != 16000:
+        import librosa
+        audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
+    return audio.astype(np.float32)