Spaces:

rezaenayati
/

RezAi

Runtime error

App Files Files Community

RezAi / app.py

rezaenayati

Update app.py

da587af verified 3 months ago

raw

history blame

2.79 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel
	import gradio as gr
	import spaces # Important for ZeroGPU

	# Load models (will be moved to GPU when needed)
	base_model = AutoModelForCausalLM.from_pretrained(
	"unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
	torch_dtype=torch.float16,
	device_map="auto", # ZeroGPU handles this
	trust_remote_code=True
	)

	tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")

	# Add padding token if missing
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Load LoRA adapter
	model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model")

	@spaces.GPU # This decorator is CRITICAL for ZeroGPU
	def chat_with_rezAi(messages, history):
	conversation = "<\|start_header_id\|>system<\|end_header_id\|>\nYou are Reza Enayati, a Computer Science student and entrepreneur from Los Angeles, who is eager to work as a software engineer or machine learning engineer. Answer these questions as if you are in an interview.<\|eot_id\|>"

	# Add conversation history
	for user_msg, assistant_msg in history:
	conversation += f"<\|start_header_id\|>user<\|end_header_id\|>\n{user_msg}<\|eot_id\|>"
	conversation += f"<\|start_header_id\|>assistant<\|end_header_id\|>\n{assistant_msg}<\|eot_id\|>"

	# Add current message
	conversation += f"<\|start_header_id\|>user<\|end_header_id\|>\n{messages}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n"

	# Tokenize
	inputs = tokenizer(conversation, return_tensors="pt", truncate=True, max_length=2048)

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=128,
	temperature=0.5,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# Decode response
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	new_response = response.split("<\|start_header_id\|>assistant<\|end_header_id\|>")[-1].strip()

	return new_response

	# Create Gradio interface
	demo = gr.ChatInterface(
	fn=chat_with_rezAi,
	title="💬 Chat with RezAI",
	description="Hi! I'm RezAI, Reza's AI twin. Ask me about his technical background, projects, or experience!",
	examples=[
	"Tell me about your background",
	"What programming languages do you know?",
	"Walk me through your Pizza Guys project",
	"What's your experience with machine learning?",
	"How did you get into computer science?"
	],
	retry_btn=None,
	undo_btn="Delete Previous",
	clear_btn="Clear Chat",
	)

	if __name__ == "__main__":
	demo.launch()