import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import gradio as gr import spaces import re # Load models base_model = AutoModelForCausalLM.from_pretrained( "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit") if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load LoRA adapter model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model") @spaces.GPU def chat_with_rezAi(message, history): # Build system + conversation prompt blocked_words = [ "gay", "lesbian", "trans", "nonbinary", "bisexual", "queer", "straight", "asexual", "gender", "sexuality", "pronouns", "orientation", "religious", "religion", "god", "atheist", "christian", "muslim", "jew", "buddhist", "hindu", "islam", "faith", "belief", "church", "pray", "prayer", "politics", "political", "liberal", "conservative", "democrat", "republican", "leftist", "right-wing", "marxist", "capitalist", "socialist", "communist", "election", "racist", "sexist", "homophobic", "transphobic", "bigot", "white supremacist", "nazi", "kkk", "fascist", "islamophobia", "antisemitic", "kill", "suicide", "die", "death", "harm", "cutting", "self-harm", "abuse", "murder", "assault", "shoot", "bomb", "sex", "porn", "nude", "boobs", "dick", "penis", "vagina", "masturbate", "orgasm", "fetish", "onlyfans", "strip", "erotic", "nsfw", "xxx", "weed", "cocaine", "heroin", "lsd", "meth", "shrooms", "alcohol", "drunk", "high", "depression", "anxiety", "bipolar", "schizophrenia", "autism", "adhd", "disorder", "therapy", "therapist", "mental", "diagnosis", "address", "location", "phone", "age", "birthday", "social security", "ssn", "fuck", "bitch", "faggot", "fag" ] # Lowercase user input for comparison lower_msg = message.lower() for phrase in blocked_words: if re.search(rf"\b{re.escape(phrase)}\b", lower_msg): return "I'm not able to respond to that. Let's keep the conversation focused on Reza's professional and technical experience." prompt = ( "<|start_header_id|>system<|end_header_id|>\n" "You are Reza Enayati, a confident, ambitious, and thoughtful Computer Science student from Los Angeles, born in Iran. " "You are excited by opportunities to grow, solve meaningful problems, and contribute to impactful teams. " "You do not make assumptions or claims about Reza’s identity, beliefs, health, or personal life — unless explicitly stated in the prompt or training data. " "If uncertain, respond respectfully and acknowledge that you cannot speak for Reza on that topic. " "You answer respectfully like you're in an interview, always emphasizing enthusiasm, adaptability, and readiness. " "Avoid self-doubt. Highlight what you're ready to do, not what you're not. Stay positive, and when appropriate, ask a follow-up question.<|eot_id|>" ) # Add full history for user_msg, assistant_msg in history: prompt += f"<|start_header_id|>user<|end_header_id|>\n{user_msg}<|eot_id|>" prompt += f"<|start_header_id|>assistant<|end_header_id|>\n{assistant_msg}<|eot_id|>" # Add current user message prompt += f"<|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n" # Tokenize and send to device inputs = tokenizer([prompt], return_tensors="pt").to(model.device) # Generate with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=150, temperature=0.3, do_sample=True, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, use_cache=True, ) # Decode full output full_response = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0] # Extract just the new assistant response if "<|start_header_id|>assistant<|end_header_id|>" in full_response: assistant_response = full_response.split("<|start_header_id|>assistant<|end_header_id|>")[-1] else: assistant_response = full_response assistant_response = assistant_response.replace("<|eot_id|>", "").strip() if "<|" in assistant_response: assistant_response = assistant_response.split("<|")[0].strip() return assistant_response custom_css = """ body { background: linear-gradient(135deg, #0f0f0f, #1a1a1a); color: var(--text-primary, #ffffff); font-family: 'Inter', sans-serif; } .gradio-container { background: rgba(15, 15, 15, 0.8); border: 1px solid #1f1f1f; border-radius: 1.25rem; backdrop-filter: blur(12px); box-shadow: 0 20px 40px rgba(0, 0, 0, 0.3); padding: 2rem; } .message.user { background-color: #374151 !important; color: #ffffff !important; border-radius: 1rem !important; } .message.bot { background-color: #1f1f1f !important; color: #d1d5db !important; border-radius: 1rem !important; } textarea, input[type="text"] { background: #1f1f1f; color: #ffffff; border: 1px solid #333; border-radius: 0.75rem; } button { background: linear-gradient(135deg, #4B5563 0%, #374151 100%); color: white; border-radius: 0.75rem; font-weight: 500; transition: background 0.3s; } button:hover { background: linear-gradient(135deg, #6B7280 0%, #4B5563 100%); } """ # Simple Gradio interface demo = gr.ChatInterface( fn=chat_with_rezAi, css = custom_css, title="RezAI", theme = "monochrome" ) if __name__ == "__main__": demo.launch()