abdibrahem commited on
Commit
e019578
·
1 Parent(s): a781967

Add voice to text feature and update the requirements file

Browse files
Files changed (3) hide show
  1. main.py +18 -1
  2. requirements.txt +52 -0
  3. voice_util.py +23 -0
main.py CHANGED
@@ -28,10 +28,13 @@ import asyncio
28
  # Import endpoints documentation
29
  from endpoints_documentation import endpoints_documentation
30
  from cache_manager import CacheManager
 
 
31
 
32
  # Set environment variables for HuggingFace
33
  os.environ["HF_HOME"] = "/tmp/huggingface"
34
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 
35
 
36
 
37
  class ChatMessage(BaseModel):
@@ -1070,7 +1073,7 @@ class HealthcareChatbot:
1070
  # if __name__ == "__main__":
1071
  # main()
1072
 
1073
- from fastapi import FastAPI, HTTPException
1074
  from pydantic import BaseModel
1075
  from typing import Dict, Any, Optional
1076
 
@@ -1099,6 +1102,20 @@ async def process_query(request: QueryRequest):
1099
  except Exception as e:
1100
  raise HTTPException(status_code=500, detail=str(e))
1101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1102
  @app.get("/health")
1103
  async def health_check():
1104
  """
 
28
  # Import endpoints documentation
29
  from endpoints_documentation import endpoints_documentation
30
  from cache_manager import CacheManager
31
+ from voice_util import load_audio
32
+ import whisper
33
 
34
  # Set environment variables for HuggingFace
35
  os.environ["HF_HOME"] = "/tmp/huggingface"
36
  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
37
+ voice_to_text_model = whisper.load_model("small", device='cpu')
38
 
39
 
40
  class ChatMessage(BaseModel):
 
1073
  # if __name__ == "__main__":
1074
  # main()
1075
 
1076
+ from fastapi import FastAPI, HTTPException, UploadFile, File
1077
  from pydantic import BaseModel
1078
  from typing import Dict, Any, Optional
1079
 
 
1102
  except Exception as e:
1103
  raise HTTPException(status_code=500, detail=str(e))
1104
 
1105
+ @app.post("/voice-text")
1106
+ async def process_query(file: UploadFile = File(...)):
1107
+ """
1108
+ Process a user voice and return a response
1109
+ """
1110
+ try:
1111
+ audio_bytes = await file.read()
1112
+ audio_numpy = load_audio(audio_bytes)
1113
+ text_response = voice_to_text_model.transcribe(audio_numpy, fp16=False)
1114
+ response = agent.chat(text_response['text']).message
1115
+ return response
1116
+ except Exception as e:
1117
+ raise HTTPException(status_code=500, detail=str(e))
1118
+
1119
  @app.get("/health")
1120
  async def health_check():
1121
  """
requirements.txt CHANGED
@@ -80,3 +80,55 @@ uvicorn==0.34.2
80
  vaderSentiment==3.3.2
81
  yarl==1.20.0
82
  zstandard==0.23.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  vaderSentiment==3.3.2
81
  yarl==1.20.0
82
  zstandard==0.23.0
83
+ annotated-types==0.7.0
84
+ anyio==4.9.0
85
+ audioread==3.0.1
86
+ certifi==2025.6.15
87
+ cffi==1.17.1
88
+ charset-normalizer==3.4.2
89
+ click==8.2.1
90
+ decorator==5.2.1
91
+ fastapi==0.115.13
92
+ filelock==3.18.0
93
+ fsspec==2025.5.1
94
+ h11==0.16.0
95
+ idna==3.10
96
+ Jinja2==3.1.6
97
+ joblib==1.5.1
98
+ lazy_loader==0.4
99
+ librosa==0.11.0
100
+ llvmlite==0.44.0
101
+ MarkupSafe==3.0.2
102
+ more-itertools==10.7.0
103
+ mpmath==1.3.0
104
+ msgpack==1.1.1
105
+ networkx==3.5
106
+ numba==0.61.2
107
+ numpy==1.26.4
108
+ openai-whisper @ git+https://github.com/openai/whisper.git@dd985ac4b90cafeef8712f2998d62c59c3e62d22
109
+ packaging==25.0
110
+ platformdirs==4.3.8
111
+ pooch==1.8.2
112
+ pycparser==2.22
113
+ pydantic==2.11.7
114
+ pydantic_core==2.33.2
115
+ python-multipart==0.0.20
116
+ regex==2024.11.6
117
+ requests==2.32.4
118
+ scikit-learn==1.7.0
119
+ scipy==1.16.0
120
+ six==1.17.0
121
+ sniffio==1.3.1
122
+ soundfile==0.13.1
123
+ soxr==0.5.0.post1
124
+ starlette==0.46.2
125
+ sympy==1.14.0
126
+ threadpoolctl==3.6.0
127
+ tiktoken==0.9.0
128
+ torch==2.2.2
129
+ tqdm==4.67.1
130
+ typing-inspection==0.4.1
131
+ typing_extensions==4.14.0
132
+ urllib3==2.5.0
133
+ uvicorn==0.34.3
134
+ whisper==1.1.10
voice_util.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import numpy as np
3
+ import soundfile as sf
4
+ import io
5
+ from tempfile import NamedTemporaryFile
6
+ import os
7
+
8
+
9
+
10
+ def load_audio(file_bytes):
11
+ # Load audio and convert to Whisper's required format
12
+ audio, sr = sf.read(io.BytesIO(file_bytes))
13
+
14
+ # Convert to mono if stereo
15
+ if len(audio.shape) > 1:
16
+ audio = np.mean(audio, axis=1)
17
+
18
+ # Resample to 16kHz if needed
19
+ if sr != 16000:
20
+ import librosa
21
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
22
+
23
+ return audio.astype(np.float32)