RezAi / app.py
rezaenayati's picture
Update app.py
da587af verified
raw
history blame
2.79 kB
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import gradio as gr
import spaces # Important for ZeroGPU
# Load models (will be moved to GPU when needed)
base_model = AutoModelForCausalLM.from_pretrained(
"unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
torch_dtype=torch.float16,
device_map="auto", # ZeroGPU handles this
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
# Add padding token if missing
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load LoRA adapter
model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model")
@spaces.GPU # This decorator is CRITICAL for ZeroGPU
def chat_with_rezAi(messages, history):
conversation = "<|start_header_id|>system<|end_header_id|>\nYou are Reza Enayati, a Computer Science student and entrepreneur from Los Angeles, who is eager to work as a software engineer or machine learning engineer. Answer these questions as if you are in an interview.<|eot_id|>"
# Add conversation history
for user_msg, assistant_msg in history:
conversation += f"<|start_header_id|>user<|end_header_id|>\n{user_msg}<|eot_id|>"
conversation += f"<|start_header_id|>assistant<|end_header_id|>\n{assistant_msg}<|eot_id|>"
# Add current message
conversation += f"<|start_header_id|>user<|end_header_id|>\n{messages}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
# Tokenize
inputs = tokenizer(conversation, return_tensors="pt", truncate=True, max_length=2048)
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=128,
temperature=0.5,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
new_response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
return new_response
# Create Gradio interface
demo = gr.ChatInterface(
fn=chat_with_rezAi,
title="💬 Chat with RezAI",
description="Hi! I'm RezAI, Reza's AI twin. Ask me about his technical background, projects, or experience!",
examples=[
"Tell me about your background",
"What programming languages do you know?",
"Walk me through your Pizza Guys project",
"What's your experience with machine learning?",
"How did you get into computer science?"
],
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear Chat",
)
if __name__ == "__main__":
demo.launch()