Spaces:

ShenghaoYummy
/

AI-chatbot

Sleeping

ShenghaoYummy commited on May 24

Commit

4287d7f

verified ·

1 Parent(s): d8c1d71

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,15 +24,10 @@ def generate(message, history):
     """
     # rebuild a single prompt string from history + current message
     prompt = ""
-    # Add conversation history
     for user_msg, assistant_msg in history:
         prompt += f"User: {user_msg}\n"
         prompt += f"Assistant: {assistant_msg}\n"
-    # Add current user message
-    prompt += f"User: {message}\n"
-    prompt += "Assistant:"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
@@ -42,17 +37,18 @@ def generate(message, history):
         temperature=0.7,
     )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # strip everything before the last "Assistant:"
     reply = text.split("Assistant:")[-1].strip()
     return reply
-# 3) build Gradio ChatInterface
-demo = gr.ChatInterface(
-    fn=generate,
-    title="TinyLlama-1.1B Chat API",
-    description="Chat with TinyLlama-1.1B and call via /api/predict",
-    type="messages",
 )
 # 4) launch

     """
     # rebuild a single prompt string from history + current message
     prompt = ""
     for user_msg, assistant_msg in history:
         prompt += f"User: {user_msg}\n"
         prompt += f"Assistant: {assistant_msg}\n"
+    prompt += f"User: {message}\nAssistant:"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         temperature=0.7,
     )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     reply = text.split("Assistant:")[-1].strip()
     return reply
+# 3) build Gradio ChatInterface *with open_routes enabled*
+demo = (
+    gr.ChatInterface(
+        fn=generate,
+        title="TinyLlama-1.1B Chat API",
+        description="Chat with TinyLlama-1.1B and call via /api/predict",
+        type="messages",
+    )
+    .queue(open_routes=True)   # ← allow direct HTTP POST to /api/predict
 )
 # 4) launch