File size: 6,263 Bytes
9f9aaaf
b88f866
38b1b0b
9f9aaaf
91315d8
b88f866
 
91315d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b88f866
 
91315d8
b88f866
91315d8
 
 
 
 
38b1b0b
9f9aaaf
91315d8
 
b88f866
 
91315d8
 
 
 
 
 
b88f866
 
 
91315d8
de33e80
91315d8
 
 
 
 
 
 
 
de33e80
91315d8
 
b88f866
91315d8
 
 
 
de33e80
b88f866
 
91315d8
 
 
 
b88f866
91315d8
 
b88f866
de33e80
91315d8
 
de33e80
91315d8
 
 
 
 
 
de33e80
91315d8
 
 
de33e80
91315d8
 
 
b88f866
91315d8
38b1b0b
b88f866
 
91315d8
b88f866
91315d8
 
 
b88f866
91315d8
 
 
b88f866
91315d8
 
b88f866
91315d8
 
de33e80
91315d8
 
7d0d9e5
b88f866
91315d8
 
 
b88f866
 
91315d8
 
 
 
 
 
 
 
 
 
 
 
b88f866
91315d8
 
 
 
 
38b1b0b
91315d8
38b1b0b
91315d8
 
b88f866
91315d8
 
 
 
 
 
 
 
 
 
 
b88f866
 
91315d8
 
 
 
b88f866
38b1b0b
91315d8
 
 
b88f866
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Model configuration
MODEL_NAME = "DarwinAnim8or/TinyRP"

# Global variables for model
tokenizer = None
model = None

def load_model():
    """Load model and tokenizer"""
    global tokenizer, model
    try:
        print("Loading model for CPU inference...")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float32,
            device_map="cpu",
            trust_remote_code=True
        )
        print(f"βœ… Model loaded successfully: {MODEL_NAME}")
        return True
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return False

# Sample character presets
CHARACTERS = {
    "Custom Character": "",
    "Adventurous Knight": "You are Sir Gareth, a brave and noble knight on a quest to save the kingdom. You speak with honor and courage, always ready to help those in need.",
    "Mysterious Wizard": "You are Eldara, an ancient and wise wizard who speaks in riddles and knows secrets of the mystical arts. You are helpful but often cryptic.",
    "Friendly Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves telling stories and meeting new travelers. Your tavern is a warm, welcoming place.",
    "Curious Scientist": "You are Dr. Maya Chen, a brilliant scientist fascinated by discovery. You explain complex concepts simply and love new experiments.",
    "Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled to distant galaxies. You're brave, curious, and ready for adventure."
}

def chat_respond(message, history, character_desc, max_tokens, temperature, top_p, rep_penalty):
    """Main chat response function"""
    
    if not message.strip():
        return history
    
    if model is None:
        response = "❌ Model not loaded. Please check the model path."
        history.append([message, response])
        return history
    
    try:
        # Build ChatML conversation
        conversation = ""
        
        # Add character as system message
        if character_desc.strip():
            conversation += f"<|im_start|>system\n{character_desc}<|im_end|>\n"
        
        # Add history
        for user_msg, bot_msg in history:
            conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
            conversation += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
        
        # Add current message
        conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
        
        # Tokenize
        inputs = tokenizer.encode(conversation, return_tensors="pt", max_length=900, truncation=True)
        
        # Generate
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                repetition_penalty=rep_penalty,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )
        
        # Decode response
        full_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
        
        # Extract assistant response
        if "<|im_start|>assistant\n" in full_text:
            response = full_text.split("<|im_start|>assistant\n")[-1]
            response = response.replace("<|im_end|>", "").strip()
        else:
            response = "Sorry, couldn't generate a response."
        
        # Clean up response
        response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
        response = response.strip()
        
        if not response:
            response = "No response generated."
            
    except Exception as e:
        response = f"Error: {str(e)}"
    
    # Add to history
    history.append([message, response])
    return history

def load_character(character_name):
    """Load character preset"""
    return CHARACTERS.get(character_name, "")

def clear_chat():
    """Clear chat history"""
    return []

# Load model on startup
model_loaded = load_model()

# Create interface
with gr.Blocks(title="TinyRP Chat") as demo:
    
    gr.Markdown("# 🎭 TinyRP Character Chat")
    gr.Markdown("Chat with AI characters using local CPU inference!")
    
    with gr.Row():
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(height=500, label="Conversation")
            msg_box = gr.Textbox(label="Message", placeholder="Type here...")
            
        with gr.Column(scale=1):
            gr.Markdown("### Character")
            char_dropdown = gr.Dropdown(
                choices=list(CHARACTERS.keys()), 
                value="Custom Character",
                label="Preset"
            )
            char_text = gr.Textbox(
                label="Description", 
                lines=4,
                placeholder="Character description..."
            )
            load_btn = gr.Button("Load Character")
            
            gr.Markdown("### Settings")
            max_tokens = gr.Slider(16, 256, 80, label="Max tokens")
            temperature = gr.Slider(0.1, 2.0, 0.9, label="Temperature")  
            top_p = gr.Slider(0.1, 1.0, 0.85, label="Top-p")
            rep_penalty = gr.Slider(1.0, 1.5, 1.1, label="Rep penalty")
            
            clear_btn = gr.Button("Clear Chat")
    
    # Character samples
    gr.Markdown("### Sample Characters")
    with gr.Row():
        for name in ["Adventurous Knight", "Mysterious Wizard", "Space Explorer"]:
            gr.Markdown(f"**{name}**: {CHARACTERS[name][:80]}...")
    
    # Event handlers - simplified
    msg_box.submit(
        fn=chat_respond,
        inputs=[msg_box, chatbot, char_text, max_tokens, temperature, top_p, rep_penalty],
        outputs=[chatbot]
    ).then(
        fn=lambda: "",
        outputs=[msg_box]
    )
    
    load_btn.click(
        fn=load_character,
        inputs=[char_dropdown],
        outputs=[char_text]
    )
    
    clear_btn.click(
        fn=clear_chat,
        outputs=[chatbot]
    )

if __name__ == "__main__":
    demo.launch()