Spaces:

ShenghaoYummy
/

AI-chatbot

Sleeping

App Files Files Community

ShenghaoYummy commited on May 28

Commit

35d9d45

verified ·

1 Parent(s): 37550b7

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -32

app.py CHANGED Viewed

@@ -1,56 +1,124 @@
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import os
-MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-# 1) load model & tokenizer
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    load_in_4bit=True,            # comment out to use full precision
-    torch_dtype=torch.float16,
-    device_map="auto",
-    trust_remote_code=True,
-)
-# 2) define inference function
 def generate(message, history):
     """
     message: Current user message (string)
     history: List of [user_message, assistant_message] pairs
     returns: assistant's reply (string)
     """
-    # rebuild a single prompt string from history + current message
-    prompt = ""
     for user_msg, assistant_msg in history:
-        prompt += f"User: {user_msg}\n"
-        prompt += f"Assistant: {assistant_msg}\n"
-    prompt += f"User: {message}\nAssistant:"
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=128,
-        do_sample=True,
-        temperature=0.7,
-    )
-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    reply = text.split("Assistant:")[-1].strip()
     return reply
-# 3) build Gradio ChatInterface *with open_routes enabled*
 demo = (
     gr.ChatInterface(
         fn=generate,
-        title="TinyLlama-1.1B Chat API",
-        description="Chat with TinyLlama-1.1B and call via /api/predict",
         type="messages",
     )
-    .queue(api_open=True)   # ← allow direct HTTP POST to /api/predict
 )
-# 4) launch
 if __name__ == "__main__":
-    demo.launch()

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
 import gradio as gr
 import os
+# Update this to your Hugging Face model ID
+MODEL_ID = "YourUsername/TinyLlama-ECommerce-Chatbot"  # Replace with your actual model ID
+BASE_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+def load_model():
+    """Load the fine-tuned model with PEFT adapter"""
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+    # Ensure pad token is set
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    print("Loading base model...")
+    base_model = AutoModelForCausalLM.from_pretrained(
+        BASE_MODEL_ID,
+        load_in_4bit=True,            # comment out to use full precision
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    print("Loading PEFT adapter...")
+    model = PeftModel.from_pretrained(base_model, MODEL_ID)
+    print("Model loaded successfully!")
+    return model, tokenizer
+# Load model and tokenizer
+model, tokenizer = load_model()
 def generate(message, history):
     """
+    Generate response using the fine-tuned e-commerce chatbot
     message: Current user message (string)
     history: List of [user_message, assistant_message] pairs
     returns: assistant's reply (string)
     """
+    # Use ChatML format that your model was trained on
+    DEFAULT_SYSTEM_PROMPT = "You are a helpful e-commerce customer service assistant. Provide accurate, helpful, and friendly responses to customer inquiries about products, orders, shipping, returns, and general shopping assistance."
+    # Build conversation in ChatML format
+    conversation = f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}\n"
+    # Add history
     for user_msg, assistant_msg in history:
+        conversation += f"<|user|>\n{user_msg}\n<|assistant|>\n{assistant_msg}\n"
+    # Add current message
+    conversation += f"<|user|>\n{message}\n<|assistant|>\n"
+    # Tokenize
+    inputs = tokenizer(
+        conversation,
+        return_tensors="pt",
+        max_length=512,
+        truncation=True,
+        padding=True
+    ).to(model.device)
+    # Generate response
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=150,
+            do_sample=True,
+            temperature=0.8,
+            top_p=0.9,
+            top_k=50,
+            repetition_penalty=1.1,
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+        )
+    # Decode and extract assistant response
+    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the new assistant response
+    if "<|assistant|>" in full_text:
+        # Get the last assistant response
+        assistant_parts = full_text.split("<|assistant|>")
+        if len(assistant_parts) > 1:
+            reply = assistant_parts[-1].strip()
+            # Remove any trailing tokens
+            if "<|user|>" in reply:
+                reply = reply.split("<|user|>")[0].strip()
+        else:
+            reply = "I apologize, but I couldn't generate a proper response. Please try again."
+    else:
+        reply = "I apologize, but I couldn't generate a proper response. Please try again."
     return reply
+# Build Gradio ChatInterface
 demo = (
     gr.ChatInterface(
         fn=generate,
+        title="E-commerce Customer Service Chatbot",
+        description="Chat with our AI-powered e-commerce assistant. Ask about products, orders, shipping, returns, and more!",
+        examples=[
+            "What's your return policy?",
+            "How long does shipping take?",
+            "Do you have any discounts available?",
+            "I need help with my order",
+            "What payment methods do you accept?"
+        ],
         type="messages",
     )
+    .queue(api_open=True)   # allow direct HTTP POST to /api/predict
 )
+# Launch the app
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",  # Allow external access
+        server_port=7860,       # Default Gradio port
+        share=False             # Set to True if you want a public link
+    )