Spaces:
Sleeping
Sleeping
File size: 6,263 Bytes
9f9aaaf b88f866 38b1b0b 9f9aaaf 91315d8 b88f866 91315d8 b88f866 91315d8 b88f866 91315d8 38b1b0b 9f9aaaf 91315d8 b88f866 91315d8 b88f866 91315d8 de33e80 91315d8 de33e80 91315d8 b88f866 91315d8 de33e80 b88f866 91315d8 b88f866 91315d8 b88f866 de33e80 91315d8 de33e80 91315d8 de33e80 91315d8 de33e80 91315d8 b88f866 91315d8 38b1b0b b88f866 91315d8 b88f866 91315d8 b88f866 91315d8 b88f866 91315d8 b88f866 91315d8 de33e80 91315d8 7d0d9e5 b88f866 91315d8 b88f866 91315d8 b88f866 91315d8 38b1b0b 91315d8 38b1b0b 91315d8 b88f866 91315d8 b88f866 91315d8 b88f866 38b1b0b 91315d8 b88f866 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Model configuration
MODEL_NAME = "DarwinAnim8or/TinyRP"
# Global variables for model
tokenizer = None
model = None
def load_model():
"""Load model and tokenizer"""
global tokenizer, model
try:
print("Loading model for CPU inference...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="cpu",
trust_remote_code=True
)
print(f"β
Model loaded successfully: {MODEL_NAME}")
return True
except Exception as e:
print(f"β Error loading model: {e}")
return False
# Sample character presets
CHARACTERS = {
"Custom Character": "",
"Adventurous Knight": "You are Sir Gareth, a brave and noble knight on a quest to save the kingdom. You speak with honor and courage, always ready to help those in need.",
"Mysterious Wizard": "You are Eldara, an ancient and wise wizard who speaks in riddles and knows secrets of the mystical arts. You are helpful but often cryptic.",
"Friendly Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves telling stories and meeting new travelers. Your tavern is a warm, welcoming place.",
"Curious Scientist": "You are Dr. Maya Chen, a brilliant scientist fascinated by discovery. You explain complex concepts simply and love new experiments.",
"Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled to distant galaxies. You're brave, curious, and ready for adventure."
}
def chat_respond(message, history, character_desc, max_tokens, temperature, top_p, rep_penalty):
"""Main chat response function"""
if not message.strip():
return history
if model is None:
response = "β Model not loaded. Please check the model path."
history.append([message, response])
return history
try:
# Build ChatML conversation
conversation = ""
# Add character as system message
if character_desc.strip():
conversation += f"<|im_start|>system\n{character_desc}<|im_end|>\n"
# Add history
for user_msg, bot_msg in history:
conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
conversation += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
# Add current message
conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
# Tokenize
inputs = tokenizer.encode(conversation, return_tensors="pt", max_length=900, truncation=True)
# Generate
with torch.no_grad():
outputs = model.generate(
inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
repetition_penalty=rep_penalty,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode response
full_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
# Extract assistant response
if "<|im_start|>assistant\n" in full_text:
response = full_text.split("<|im_start|>assistant\n")[-1]
response = response.replace("<|im_end|>", "").strip()
else:
response = "Sorry, couldn't generate a response."
# Clean up response
response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
response = response.strip()
if not response:
response = "No response generated."
except Exception as e:
response = f"Error: {str(e)}"
# Add to history
history.append([message, response])
return history
def load_character(character_name):
"""Load character preset"""
return CHARACTERS.get(character_name, "")
def clear_chat():
"""Clear chat history"""
return []
# Load model on startup
model_loaded = load_model()
# Create interface
with gr.Blocks(title="TinyRP Chat") as demo:
gr.Markdown("# π TinyRP Character Chat")
gr.Markdown("Chat with AI characters using local CPU inference!")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(height=500, label="Conversation")
msg_box = gr.Textbox(label="Message", placeholder="Type here...")
with gr.Column(scale=1):
gr.Markdown("### Character")
char_dropdown = gr.Dropdown(
choices=list(CHARACTERS.keys()),
value="Custom Character",
label="Preset"
)
char_text = gr.Textbox(
label="Description",
lines=4,
placeholder="Character description..."
)
load_btn = gr.Button("Load Character")
gr.Markdown("### Settings")
max_tokens = gr.Slider(16, 256, 80, label="Max tokens")
temperature = gr.Slider(0.1, 2.0, 0.9, label="Temperature")
top_p = gr.Slider(0.1, 1.0, 0.85, label="Top-p")
rep_penalty = gr.Slider(1.0, 1.5, 1.1, label="Rep penalty")
clear_btn = gr.Button("Clear Chat")
# Character samples
gr.Markdown("### Sample Characters")
with gr.Row():
for name in ["Adventurous Knight", "Mysterious Wizard", "Space Explorer"]:
gr.Markdown(f"**{name}**: {CHARACTERS[name][:80]}...")
# Event handlers - simplified
msg_box.submit(
fn=chat_respond,
inputs=[msg_box, chatbot, char_text, max_tokens, temperature, top_p, rep_penalty],
outputs=[chatbot]
).then(
fn=lambda: "",
outputs=[msg_box]
)
load_btn.click(
fn=load_character,
inputs=[char_dropdown],
outputs=[char_text]
)
clear_btn.click(
fn=clear_chat,
outputs=[chatbot]
)
if __name__ == "__main__":
demo.launch() |