Spaces:

hareballak
/

Ponni

Sleeping

App Files Files Community

hareballak commited on Apr 28

Commit

fbaa611

verified ·

1 Parent(s): 34a23c4

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -10

app.py CHANGED Viewed

@@ -73,30 +73,48 @@ def get_bot_response(query):
     best_idx = torch.argmax(scores)
     top_qa = qa_data[best_idx]
-    prompt = f"""நீ ஒரு அறிவாளியான தமிழ் உதவியாளர்.
     தகவல்கள்:
     கேள்வி: {top_qa['question']}
     பதில்: {top_qa['answer']}
     மேலே உள்ள தகவல்களைப் பயன்படுத்தி, தெளிவான மற்றும் சுருக்கமான பதிலை வழங்கவும்.
     உயர்கட்ட கேள்வி: {query}
     பதில்:"""
-    # Use LLaMA for generating the refined response
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
     payload = {
         "inputs": prompt,
-        "parameters": {"temperature": 0.7, "max_new_tokens": 150, "return_full_text": False},
     }
     response = requests.post(LLAMA_API_URL, headers=headers, json=payload)
-    result = response.json()
-    if isinstance(result, list) and "generated_text" in result[0]:
-        return result[0]["generated_text"]
-    else:
-        return "மன்னிக்கவும், நான் இந்த கேள்விக்கு பதில் தர முடியவில்லை."
 # Gradio interface function
 def chatbot(audio, message, system_message, max_tokens, temperature, top_p):

     best_idx = torch.argmax(scores)
     top_qa = qa_data[best_idx]
+    prompt = f"""நீ ஒரு அறிவாளியான தமிழ் உதவியாளர்.
     தகவல்கள்:
     கேள்வி: {top_qa['question']}
     பதில்: {top_qa['answer']}
     மேலே உள்ள தகவல்களைப் பயன்படுத்தி, தெளிவான மற்றும் சுருக்கமான பதிலை வழங்கவும்.
     உயர்கட்ட கேள்வி: {query}
     பதில்:"""
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
     payload = {
         "inputs": prompt,
+        "parameters": {
+            "temperature": 0.7,
+            "max_new_tokens": 150,
+            "return_full_text": False
+        },
     }
+    # Post request
     response = requests.post(LLAMA_API_URL, headers=headers, json=payload)
+    # Sometimes inference is slow ➔ Wait for result
+    start_time = time.time()
+    max_wait_seconds = 180  # 💬 wait up to 3 minutes if necessary
+    while True:
+        try:
+            result = response.json()
+            if isinstance(result, list) and "generated_text" in result[0]:
+                return result[0]["generated_text"]
+            elif "error" in result and "loading" in result["error"].lower():
+                print("⏳ Model is loading, waiting 10 seconds...")
+                time.sleep(10)
+            else:
+                return "மன்னிக்கவும், நான் இந்த கேள்விக்கு பதில் தர முடியவில்லை."
+        except Exception as e:
+            if time.time() - start_time > max_wait_seconds:
+                return f"Error: Timeout while waiting for model prediction after {max_wait_seconds} seconds."
+            print(f"Waiting for model to respond... {str(e)}")
+            time.sleep(5)  # wait 5 seconds before retry
 # Gradio interface function
 def chatbot(audio, message, system_message, max_tokens, temperature, top_p):