khalednabawi11 commited on
Commit
b058bd9
·
verified ·
1 Parent(s): 9b4a539

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -276
app.py CHANGED
@@ -1,253 +1,50 @@
1
- # import torch
2
- # import asyncio
3
- # import logging
4
- # import signal
5
- # import uvicorn
6
- # import os
7
-
8
- # from fastapi import FastAPI, Request, HTTPException, status
9
- # from pydantic import BaseModel, Field
10
- # from langdetect import detect
11
-
12
- # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, GenerationConfig
13
- # from langchain.vectorstores import Qdrant
14
- # from langchain.embeddings import HuggingFaceEmbeddings
15
- # from langchain.chains import RetrievalQA
16
- # from langchain.llms import HuggingFacePipeline
17
- # from qdrant_client import QdrantClient
18
- # from langchain.callbacks.base import BaseCallbackHandler
19
- # from huggingface_hub import hf_hub_download
20
- # from contextlib import asynccontextmanager
21
-
22
- # # Get environment variables
23
- # QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
24
- # QDRANT_URL = os.getenv("QDRANT_URL")
25
- # COLLECTION_NAME = "arabic_rag_collection"
26
- # QDRANT_URL = os.getenv("QDRANT_URL", "https://12efeef2-9f10-4402-9deb-f070977ddfc8.eu-central-1-0.aws.cloud.qdrant.io:6333")
27
- # QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.Jb39rYQW2rSE9RdXrjdzKY6T1RF44XjdQzCvzFkjat4")
28
-
29
- # # === LOGGING === #
30
- # logging.basicConfig(level=logging.DEBUG)
31
- # logger = logging.getLogger(__name__)
32
-
33
- # # Load model and tokenizer
34
- # model_name = "FreedomIntelligence/Apollo-7B"
35
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
36
- # model = AutoModelForCausalLM.from_pretrained(model_name)
37
- # tokenizer.pad_token = tokenizer.eos_token
38
-
39
- # # Connect to Qdrant + embedding
40
- # embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
41
- # qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
42
-
43
- # vector_store = Qdrant(
44
- # client=qdrant_client,
45
- # collection_name=COLLECTION_NAME,
46
- # embeddings=embedding
47
- # )
48
-
49
- # # Generation settings
50
- # generation_config = GenerationConfig(
51
- # max_new_tokens=150,
52
- # temperature=0.2,
53
- # top_k=20,
54
- # do_sample=True,
55
- # top_p=0.7,
56
- # repetition_penalty=1.3,
57
- # )
58
-
59
- # # Text generation pipeline
60
- # llm_pipeline = pipeline(
61
- # model=model,
62
- # tokenizer=tokenizer,
63
- # task="text-generation",
64
- # generation_config=generation_config,
65
- # device=model.device.index if model.device.type == "cuda" else -1
66
- # )
67
-
68
- # llm = HuggingFacePipeline(pipeline=llm_pipeline)
69
-
70
- # retriever = vector_store.as_retriever(search_kwargs={"k": 3})
71
-
72
- # # Set up RAG QA chain
73
- # qa_chain = RetrievalQA.from_chain_type(
74
- # llm=llm,
75
- # retriever=retriever,
76
- # chain_type="stuff"
77
- # )
78
-
79
- # # FastAPI setup
80
- # app = FastAPI(title="Apollo RAG Medical Chatbot")
81
-
82
- # class Query(BaseModel):
83
- # question: str = Field(..., example="ما هي اسباب تساقط الشعر ؟", min_length=3)
84
-
85
- # class TimeoutCallback(BaseCallbackHandler):
86
- # def __init__(self, timeout_seconds: int = 60):
87
- # self.timeout_seconds = timeout_seconds
88
- # self.start_time = None
89
-
90
- # async def on_llm_start(self, *args, **kwargs):
91
- # self.start_time = asyncio.get_event_loop().time()
92
-
93
- # async def on_llm_new_token(self, *args, **kwargs):
94
- # if asyncio.get_event_loop().time() - self.start_time > self.timeout_seconds:
95
- # raise TimeoutError("LLM processing timeout")
96
-
97
- # # Prompt template
98
- # # def generate_prompt(question: str) -> str:
99
- # # lang = detect(question)
100
- # # if lang == "ar":
101
- # # return f"""أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة.
102
- # # وتأكد من ان:
103
- # # - عدم تكرار أي نقطة أو عبارة أو كلمة
104
- # # - وضوح وسلاسة كل نقطة
105
- # # - تجنب الحشو والعبارات الزائدة
106
- # # السؤال: {question}
107
- # # الإجابة:"""
108
- # # else:
109
- # # return f"""Answer the following medical question in clear English with a detailed, non-redundant response. Do not repeat ideas or restate the question. If the context lacks information, rely on prior medical knowledge.
110
- # # Question: {question}
111
- # # Answer:"""
112
-
113
-
114
- # def generate_prompt(question: str) -> str:
115
- # lang = detect(question)
116
- # if lang == "ar":
117
- # return (
118
- # "أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة. \n"
119
- # "- عدم تكرار أي نقطة أو عبارة أو كلمة\n"
120
- # "- وضوح وسلاسة كل نقطة\n"
121
- # "- تجنب الحشو والعبارات الزائدة\n"
122
- # f"\nالسؤال: {question}\nالإجابة:"
123
- # )
124
- # else:
125
- # return (
126
- # "Answer the following medical question in clear English with a detailed, non-redundant response. "
127
- # "Do not repeat ideas, phrases, or restate the question in the answer. If the context lacks relevant "
128
- # "information, rely on your prior medical knowledge. If the answer involves multiple points, list them "
129
- # "in concise and distinct bullet points:\n"
130
- # f"Question: {question}\nAnswer:"
131
- # )
132
-
133
- # # Input schema
134
- # # class ChatRequest(BaseModel):
135
- # # message: str
136
-
137
- # # # Output endpoint
138
- # # @app.post("/chat")
139
- # # def chat_rag(req: ChatRequest):
140
- # # prompt = generate_prompt(req.message)
141
- # # response = qa_chain.run(prompt)
142
- # # return {"response": response}
143
-
144
-
145
- # # === ROUTES === #
146
- # @app.get("/")
147
- # async def root():
148
- # return {"message": "Medical QA API is running!"}
149
-
150
- # @app.post("/ask")
151
- # async def ask(query: Query):
152
- # try:
153
- # logger.debug(f"Received question: {query.question}")
154
- # prompt = generate_prompt(query.question)
155
- # timeout_callback = TimeoutCallback(timeout_seconds=60)
156
-
157
-
158
- # # docs = retriever.get_relevant_documents(query.question)
159
- # # if not docs:
160
- # # logger.warning("No documents retrieved from Qdrant for the question.")
161
- # # else:
162
- # # logger.debug(f"Retrieved documents: {[doc.page_content for doc in docs[:1]]}")
163
-
164
- # loop = asyncio.get_event_loop()
165
-
166
- # answer = await asyncio.wait_for(
167
- # # qa_chain.run(prompt, callbacks=[timeout_callback]),
168
- # loop.run_in_executor(None, qa_chain.run, query.question),
169
- # timeout=360
170
- # )
171
-
172
- # if not answer:
173
- # raise ValueError("Empty answer returned from model")
174
-
175
- # if 'Answer:' in answer:
176
- # response_text = answer.split('Answer:')[-1].strip()
177
- # elif 'الإجابة:' in answer:
178
- # response_text = answer.split('الإجابة:')[-1].strip()
179
- # else:
180
- # response_text = answer.strip()
181
-
182
-
183
- # return {
184
- # "status": "success",
185
- # "answer": answer,
186
- # "response": response_text,
187
- # "language": detect(query.question)
188
- # }
189
-
190
- # except TimeoutError as te:
191
- # logger.error("Request timed out", exc_info=True)
192
- # raise HTTPException(
193
- # status_code=status.HTTP_504_GATEWAY_TIMEOUT,
194
- # detail={"status": "error", "message": "Request timed out", "error": str(te)}
195
- # )
196
-
197
- # except Exception as e:
198
- # logger.error(f"Unexpected error: {e}", exc_info=True)
199
- # raise HTTPException(
200
- # status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
201
- # detail={"status": "error", "message": "Internal server error", "error": str(e)}
202
- # )
203
-
204
- # # === ENTRYPOINT === #
205
- # if __name__ == "__main__":
206
- # def handle_exit(signum, frame):
207
- # print("Shutting down gracefully...")
208
- # exit(0)
209
-
210
- # signal.signal(signal.SIGINT, handle_exit)
211
- # import uvicorn
212
- # uvicorn.run(app, host="0.0.0.0", port=8000)
213
-
214
-
215
-
216
  import torch
217
  import asyncio
218
  import logging
219
  import signal
220
  import uvicorn
221
- import os
222
 
223
  from fastapi import FastAPI, Request, HTTPException, status
224
  from pydantic import BaseModel, Field
225
  from langdetect import detect
226
 
227
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, GenerationConfig
228
- from langchain_community.vectorstores import Qdrant
229
  from langchain.embeddings import HuggingFaceEmbeddings
230
- from langchain_community.llms import HuggingFacePipeline
 
231
  from qdrant_client import QdrantClient
232
- from langchain_core.runnables import RunnableMap
233
  from huggingface_hub import hf_hub_download
 
234
 
235
- # === ENVIRONMENT SETUP === #
236
- QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "your_fallback_api_key")
237
- QDRANT_URL = os.getenv("QDRANT_URL", "your_fallback_qdrant_url")
238
  COLLECTION_NAME = "arabic_rag_collection"
 
 
239
 
240
  # === LOGGING === #
241
- logging.basicConfig(level=logging.INFO)
242
  logger = logging.getLogger(__name__)
243
 
244
- # === MODEL SETUP === #
245
  model_name = "FreedomIntelligence/Apollo-7B"
246
  tokenizer = AutoTokenizer.from_pretrained(model_name)
247
  model = AutoModelForCausalLM.from_pretrained(model_name)
248
  tokenizer.pad_token = tokenizer.eos_token
249
 
250
- # === GENERATION CONFIG === #
 
 
 
 
 
 
 
 
 
 
251
  generation_config = GenerationConfig(
252
  max_new_tokens=150,
253
  temperature=0.2,
@@ -257,6 +54,7 @@ generation_config = GenerationConfig(
257
  repetition_penalty=1.3,
258
  )
259
 
 
260
  llm_pipeline = pipeline(
261
  model=model,
262
  tokenizer=tokenizer,
@@ -264,55 +62,75 @@ llm_pipeline = pipeline(
264
  generation_config=generation_config,
265
  device=model.device.index if model.device.type == "cuda" else -1
266
  )
267
- llm = HuggingFacePipeline(pipeline=llm_pipeline)
268
 
269
- # === EMBEDDING + VECTOR STORE === #
270
- embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
271
- qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
272
 
273
- vector_store = Qdrant(
274
- client=qdrant_client,
275
- collection_name=COLLECTION_NAME,
276
- embeddings=embedding
277
- )
278
  retriever = vector_store.as_retriever(search_kwargs={"k": 3})
279
 
280
- # === PROMPT FUNCTION === #
281
- def generate_prompt(question: str) -> str:
282
- lang = detect(question)
283
- if lang == "ar":
284
- return (
285
- "أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة. \n"
286
- "- عدم تكرار أي نقطة أو عبارة أو كلمة\n"
287
- "- وضوح وسلاسة كل نقطة\n"
288
- "- تجنب الحشو والعبارات الزائدة\n"
289
- f"\nالسؤال: {question}\nالإجابة:"
290
- )
291
- else:
292
- return (
293
- "Answer the following medical question in clear English with a detailed, non-redundant response. "
294
- "Do not repeat ideas, phrases, or restate the question. If the context lacks relevant "
295
- "information, rely on prior medical knowledge.\n"
296
- f"Question: {question}\nAnswer:"
297
- )
298
 
299
- # === FASTAPI SETUP === #
300
  app = FastAPI(title="Apollo RAG Medical Chatbot")
301
 
302
  class Query(BaseModel):
303
  question: str = Field(..., example="ما هي اسباب تساقط الشعر ؟", min_length=3)
304
 
305
- # === RAG PIPELINE === #
306
- async def async_chain(question: str):
307
- prompt = generate_prompt(question)
308
- docs = await retriever.aget_relevant_documents(question)
309
- if not docs:
310
- logger.warning("No relevant documents found in Qdrant.")
311
- context = "\n".join([doc.page_content for doc in docs])
312
- full_prompt = f"{context}\n\n{prompt}"
313
- logger.debug(f"Prompt: {full_prompt}")
314
- response = llm.invoke(full_prompt)
315
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
  # === ROUTES === #
318
  @app.get("/")
@@ -322,34 +140,54 @@ async def root():
322
  @app.post("/ask")
323
  async def ask(query: Query):
324
  try:
325
- response = await asyncio.wait_for(async_chain(query.question), timeout=60)
 
 
 
 
 
 
 
 
326
 
327
- if 'Answer:' in response:
328
- response_text = response.split('Answer:')[-1].strip()
329
- elif 'الإجابة:' in response:
330
- response_text = response.split('الإجابة:')[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
331
  else:
332
- response_text = response.strip()
333
 
 
334
  return {
335
  "status": "success",
336
- "answer": response,
337
  "response": response_text,
338
  "language": detect(query.question)
339
  }
340
 
341
- except asyncio.TimeoutError:
342
- logger.error("Request timed out")
343
  raise HTTPException(
344
  status_code=status.HTTP_504_GATEWAY_TIMEOUT,
345
- detail="Request timed out"
346
  )
347
 
348
  except Exception as e:
349
  logger.error(f"Unexpected error: {e}", exc_info=True)
350
  raise HTTPException(
351
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
352
- detail=f"Internal server error: {e}"
353
  )
354
 
355
  # === ENTRYPOINT === #
@@ -359,6 +197,6 @@ if __name__ == "__main__":
359
  exit(0)
360
 
361
  signal.signal(signal.SIGINT, handle_exit)
 
362
  uvicorn.run(app, host="0.0.0.0", port=8000)
363
 
364
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import asyncio
3
  import logging
4
  import signal
5
  import uvicorn
6
+ import os
7
 
8
  from fastapi import FastAPI, Request, HTTPException, status
9
  from pydantic import BaseModel, Field
10
  from langdetect import detect
11
 
12
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, GenerationConfig
13
+ from langchain.vectorstores import Qdrant
14
  from langchain.embeddings import HuggingFaceEmbeddings
15
+ from langchain.chains import RetrievalQA
16
+ from langchain.llms import HuggingFacePipeline
17
  from qdrant_client import QdrantClient
18
+ from langchain.callbacks.base import BaseCallbackHandler
19
  from huggingface_hub import hf_hub_download
20
+ from contextlib import asynccontextmanager
21
 
22
+ # Get environment variables
 
 
23
  COLLECTION_NAME = "arabic_rag_collection"
24
+ QDRANT_URL = os.getenv("QDRANT_URL", "https://12efeef2-9f10-4402-9deb-f070977ddfc8.eu-central-1-0.aws.cloud.qdrant.io:6333")
25
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.Jb39rYQW2rSE9RdXrjdzKY6T1RF44XjdQzCvzFkjat4")
26
 
27
  # === LOGGING === #
28
+ logging.basicConfig(level=logging.DEBUG)
29
  logger = logging.getLogger(__name__)
30
 
31
+ # Load model and tokenizer
32
  model_name = "FreedomIntelligence/Apollo-7B"
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
  model = AutoModelForCausalLM.from_pretrained(model_name)
35
  tokenizer.pad_token = tokenizer.eos_token
36
 
37
+ # Connect to Qdrant + embedding
38
+ embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
39
+ qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
40
+
41
+ vector_store = Qdrant(
42
+ client=qdrant_client,
43
+ collection_name=COLLECTION_NAME,
44
+ embeddings=embedding
45
+ )
46
+
47
+ # Generation settings
48
  generation_config = GenerationConfig(
49
  max_new_tokens=150,
50
  temperature=0.2,
 
54
  repetition_penalty=1.3,
55
  )
56
 
57
+ # Text generation pipeline
58
  llm_pipeline = pipeline(
59
  model=model,
60
  tokenizer=tokenizer,
 
62
  generation_config=generation_config,
63
  device=model.device.index if model.device.type == "cuda" else -1
64
  )
 
65
 
66
+ llm = HuggingFacePipeline(pipeline=llm_pipeline)
 
 
67
 
 
 
 
 
 
68
  retriever = vector_store.as_retriever(search_kwargs={"k": 3})
69
 
70
+ # Set up RAG QA chain
71
+ qa_chain = RetrievalQA.from_chain_type(
72
+ llm=llm,
73
+ retriever=retriever,
74
+ chain_type="stuff"
75
+ )
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # FastAPI setup
78
  app = FastAPI(title="Apollo RAG Medical Chatbot")
79
 
80
  class Query(BaseModel):
81
  question: str = Field(..., example="ما هي اسباب تساقط الشعر ؟", min_length=3)
82
 
83
+ class TimeoutCallback(BaseCallbackHandler):
84
+ def __init__(self, timeout_seconds: int = 60):
85
+ self.timeout_seconds = timeout_seconds
86
+ self.start_time = None
87
+
88
+ async def on_llm_start(self, *args, **kwargs):
89
+ self.start_time = asyncio.get_event_loop().time()
90
+
91
+ async def on_llm_new_token(self, *args, **kwargs):
92
+ if asyncio.get_event_loop().time() - self.start_time > self.timeout_seconds:
93
+ raise TimeoutError("LLM processing timeout")
94
+
95
+
96
+ # def generate_prompt(question: str) -> str:
97
+ # lang = detect(question)
98
+ # if lang == "ar":
99
+ # return (
100
+ # "أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة. \n"
101
+ # "- عدم تكرار أي نقطة أو عبارة أو كلمة\n"
102
+ # "- وضوح وسلاسة كل نقطة\n"
103
+ # "- تجنب الحشو والعبارات الزائدة\n"
104
+ # f"\nالسؤال: {question}\nالإجابة:"
105
+ # )
106
+ # else:
107
+ # return (
108
+ # "Answer the following medical question in clear English with a detailed, non-redundant response. "
109
+ # "Do not repeat ideas, phrases, or restate the question in the answer. If the context lacks relevant "
110
+ # "information, rely on your prior medical knowledge. If the answer involves multiple points, list them "
111
+ # "in concise and distinct bullet points:\n"
112
+ # f"Question: {question}\nAnswer:"
113
+ # )
114
+
115
+ from langdetect import detect
116
+
117
+ def generate_prompt(question):
118
+ lang = detect(question)
119
+ if lang == "ar":
120
+ return f"""أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة.
121
+ وتأكد من ان:
122
+ - عدم تكرار أي نقطة أو عبارة أو كلمة
123
+ - وضوح وسلاسة كل نقطة
124
+ - تجنب الحشو والعبارات الزائدة-
125
+
126
+ السؤال: {question}
127
+ الإجابة:
128
+ """
129
+
130
+ else:
131
+ return f"""Answer the following medical question in clear English with a detailed, non-redundant response. Do not repeat ideas, phrases, or restate the question in the answer. If the context lacks relevant information, rely on your prior medical knowledge. If the answer involves multiple points, list them in concise and distinct bullet points:
132
+ Question: {question}
133
+ Answer:"""
134
 
135
  # === ROUTES === #
136
  @app.get("/")
 
140
  @app.post("/ask")
141
  async def ask(query: Query):
142
  try:
143
+ logger.debug(f"Received question: {query.question}")
144
+ prompt = generate_prompt(query.question)
145
+ timeout_callback = TimeoutCallback(timeout_seconds=60)
146
+
147
+ # docs = retriever.get_relevant_documents(query.question)
148
+ # if not docs:
149
+ # logger.warning("No documents retrieved from Qdrant for the question.")
150
+ # else:
151
+ # logger.debug(f"Retrieved documents: {[doc.page_content for doc in docs[:1]]}")
152
 
153
+ loop = asyncio.get_event_loop()
154
+
155
+ answer = await asyncio.wait_for(
156
+ # qa_chain.run(prompt, callbacks=[timeout_callback]),
157
+ loop.run_in_executor(None, qa_chain.run, query.question),
158
+ timeout=360
159
+ )
160
+
161
+ if not answer:
162
+ raise ValueError("Empty answer returned from model")
163
+
164
+ if 'Answer:' in answer:
165
+ response_text = answer.split('Answer:')[-1].strip()
166
+ elif 'الإجابة:' in answer:
167
+ response_text = answer.split('الإجابة:')[-1].strip()
168
  else:
169
+ response_text = answer.strip()
170
 
171
+
172
  return {
173
  "status": "success",
174
+ "answer": answer,
175
  "response": response_text,
176
  "language": detect(query.question)
177
  }
178
 
179
+ except TimeoutError as te:
180
+ logger.error("Request timed out", exc_info=True)
181
  raise HTTPException(
182
  status_code=status.HTTP_504_GATEWAY_TIMEOUT,
183
+ detail={"status": "error", "message": "Request timed out", "error": str(te)}
184
  )
185
 
186
  except Exception as e:
187
  logger.error(f"Unexpected error: {e}", exc_info=True)
188
  raise HTTPException(
189
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
190
+ detail={"status": "error", "message": "Internal server error", "error": str(e)}
191
  )
192
 
193
  # === ENTRYPOINT === #
 
197
  exit(0)
198
 
199
  signal.signal(signal.SIGINT, handle_exit)
200
+ import uvicorn
201
  uvicorn.run(app, host="0.0.0.0", port=8000)
202