Spaces:

rezaenayati
/

RezAi

Runtime error

App Files Files Community

RezAi / app.py

rezaenayati

Update app.py

7164a5b verified 3 months ago

raw

history blame

3.29 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel
	import gradio as gr
	import spaces # Important for ZeroGPU

	# Load models (will be moved to GPU when needed)
	base_model = AutoModelForCausalLM.from_pretrained(
	"unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
	torch_dtype=torch.float16,
	device_map="auto", # ZeroGPU handles this
	trust_remote_code=True
	)

	tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")

	# Add padding token if missing
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Load LoRA adapter
	model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model")

	@spaces.GPU # This decorator is CRITICAL for ZeroGPU
	def chat_with_rezAi(messages, history):
	conversation = "<\|start_header_id\|>system<\|end_header_id\|>\nYou are Reza Enayati, a Computer Science student and entrepreneur from Los Angeles, who is eager to work as a software engineer or machine learning engineer. Answer these questions as if you are in an interview.<\|eot_id\|>"

	# Add conversation history
	for user_msg, assistant_msg in history:
	conversation += f"<\|start_header_id\|>user<\|end_header_id\|>\n{user_msg}<\|eot_id\|>"
	conversation += f"<\|start_header_id\|>assistant<\|end_header_id\|>\n{assistant_msg}<\|eot_id\|>"

	# Add current message
	conversation += f"<\|start_header_id\|>user<\|end_header_id\|>\n{messages}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n"

	# Tokenize - fix the max_length parameter
	inputs = tokenizer(
	conversation,
	return_tensors="pt",
	truncation=True, # Changed from 'truncate=True'
	max_length=2048
	)

	# Move inputs to the same device as model
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=128,
	temperature=0.7, # Slightly increased for more variety
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	repetition_penalty=1.1 # Added to reduce repetition
	)

	# Decode response
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	new_response = response.split("<\|start_header_id\|>assistant<\|end_header_id\|>")[-1].strip()

	# Clean up response - remove any incomplete tags
	if "<\|" in new_response:
	new_response = new_response.split("<\|")[0].strip()

	return new_response

	# Create Gradio interface
	demo = gr.ChatInterface(
	fn=chat_with_rezAi,
	title="💬 Chat with RezAI",
	description="Hi! I'm RezAI, Reza's AI twin. Ask me about his technical background, projects, or experience!",
	examples=[
	"Tell me about your background",
	"What programming languages do you know?",
	"Walk me through RezAI",
	"What's your experience with machine learning?",
	"How did you get into computer science?"
	],
	retry_btn=None,
	undo_btn="Delete Previous",
	clear_btn="Clear Chat",
	theme=gr.themes.Soft(), # Added a nice theme
	)

	if __name__ == "__main__":
	demo.launch()