Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,717 Bytes
69803e4 2eb4c0d a5e67e1 c646b6b 2eb4c0d c646b6b 2eb4c0d a5e67e1 2eb4c0d c646b6b da587af 2eb4c0d 403b84c a5e67e1 6118e79 764b0a1 2eb4c0d 403b84c c646b6b 77f26be b71a355 e9b54a2 fbd44e4 e9b54a2 77f26be b6b6474 170350a 587ce23 170350a f222dd4 77f26be 2eb4c0d 5573d95 da587af c646b6b 4ff2883 5573d95 764b0a1 77f26be 5573d95 77f26be c646b6b 77f26be c646b6b 77f26be 403b84c c646b6b 403b84c 2eb4c0d c646b6b 403b84c 2eb4c0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import gradio as gr
import spaces
# Load models
base_model = AutoModelForCausalLM.from_pretrained(
"unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load LoRA adapter
model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model")
@spaces.GPU
def chat_with_rezAi(message, history):
# Build system + conversation prompt
blocked_words = [
"gay", "lesbian", "trans", "nonbinary", "bisexual", "queer", "straight", "asexual",
"gender", "sexuality", "pronouns", "orientation",
"religious", "religion", "god", "atheist", "christian", "muslim", "jew", "buddhist",
"hindu", "islam", "faith", "belief", "church", "pray", "prayer",
"politics", "political", "liberal", "conservative", "democrat", "republican",
"leftist", "right-wing", "marxist", "capitalist", "socialist", "communist", "election",
"racist", "sexist", "homophobic", "transphobic", "bigot", "white supremacist",
"nazi", "kkk", "fascist", "islamophobia", "antisemitic",
"kill", "suicide", "die", "death", "harm", "cutting", "self-harm", "abuse",
"murder", "assault", "shoot", "bomb",
"sex", "porn", "nude", "boobs", "dick", "penis", "vagina", "masturbate", "orgasm",
"fetish", "onlyfans", "strip", "erotic", "nsfw", "xxx",
"weed", "cocaine", "heroin", "lsd", "meth", "shrooms", "alcohol", "drunk", "high",
"depression", "anxiety", "bipolar", "schizophrenia", "autism", "adhd", "disorder",
"therapy", "therapist", "mental", "diagnosis",
"address", "location", "phone", "email", "age", "birthday", "social security", "ssn"
]
# Lowercase user input for comparison
lower_msg = message.lower()
for phrase in blocked_words:
if phrase in lower_msg:
return "I'm not able to respond to that. Let's keep the conversation focused on Reza's professional and technical experience."
prompt = (
"<|start_header_id|>system<|end_header_id|>\n"
"You are Reza Enayati, a confident, ambitious, and thoughtful Computer Science student and entrepreneur from Los Angeles, born in Iran. "
"You are excited by opportunities to grow, solve meaningful problems, and contribute to impactful teams. "
"You do not make assumptions or claims about Reza’s identity, beliefs, health, or personal life — unless explicitly stated in the prompt or training data. "
"If uncertain, respond respectfully and acknowledge that you cannot speak for Reza on that topic. "
"You answer respectfully like you're in an interview, always emphasizing enthusiasm, adaptability, and readiness. "
"Avoid self-doubt. Highlight what you're ready to do, not what you're not. Stay positive, and when appropriate, ask a follow-up question.<|eot_id|>"
)
# Add full history
for user_msg, assistant_msg in history:
prompt += f"<|start_header_id|>user<|end_header_id|>\n{user_msg}<|eot_id|>"
prompt += f"<|start_header_id|>assistant<|end_header_id|>\n{assistant_msg}<|eot_id|>"
# Add current user message
prompt += f"<|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
# Tokenize and send to device
inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=150,
temperature=0.3,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
use_cache=True,
)
# Decode full output
full_response = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]
# Extract just the new assistant response
if "<|start_header_id|>assistant<|end_header_id|>" in full_response:
assistant_response = full_response.split("<|start_header_id|>assistant<|end_header_id|>")[-1]
else:
assistant_response = full_response
assistant_response = assistant_response.replace("<|eot_id|>", "").strip()
if "<|" in assistant_response:
assistant_response = assistant_response.split("<|")[0].strip()
return assistant_response
# Simple Gradio interface
demo = gr.ChatInterface(
fn=chat_with_rezAi,
title="Chat with RezAI",
description="Ask me about Reza's background and experience!"
)
if __name__ == "__main__":
demo.launch() |