Update handler.py
Browse files- handler.py +14 -0
handler.py
CHANGED
@@ -64,6 +64,8 @@ class EndpointHandler:
|
|
64 |
self.padding_token = 128263
|
65 |
self.start_audio_token = 128257 # Start of Audio token
|
66 |
self.end_audio_token = 128258 # End of Audio token
|
|
|
|
|
67 |
|
68 |
logger.info("Handler initialization complete")
|
69 |
|
@@ -268,6 +270,10 @@ class EndpointHandler:
|
|
268 |
"""
|
269 |
Main entry point for the handler
|
270 |
"""
|
|
|
|
|
|
|
|
|
271 |
try:
|
272 |
logger.info(f"Received request: {type(data)}")
|
273 |
|
@@ -286,3 +292,11 @@ class EndpointHandler:
|
|
286 |
logger.error(traceback.format_exc())
|
287 |
return {"error": str(e)}
|
288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
self.padding_token = 128263
|
65 |
self.start_audio_token = 128257 # Start of Audio token
|
66 |
self.end_audio_token = 128258 # End of Audio token
|
67 |
+
|
68 |
+
self._warmed_up = False
|
69 |
|
70 |
logger.info("Handler initialization complete")
|
71 |
|
|
|
270 |
"""
|
271 |
Main entry point for the handler
|
272 |
"""
|
273 |
+
# Run warmup only once, the first time __call__ is triggered
|
274 |
+
if not self._warmed_up:
|
275 |
+
self._warmup()
|
276 |
+
|
277 |
try:
|
278 |
logger.info(f"Received request: {type(data)}")
|
279 |
|
|
|
292 |
logger.error(traceback.format_exc())
|
293 |
return {"error": str(e)}
|
294 |
|
295 |
+
def _warmup(self):
|
296 |
+
try:
|
297 |
+
dummy_prompt = "tara: Hello"
|
298 |
+
input_ids = self.tokenizer(dummy_prompt, return_tensors="pt").input_ids.to(self.device)
|
299 |
+
_ = self.model.generate(input_ids=input_ids, max_new_tokens=5)
|
300 |
+
self._warmed_up = True
|
301 |
+
except Exception as e:
|
302 |
+
print(f"[WARMUP ERROR] {str(e)}")
|