Spaces:
Running
Running
add debug print
Browse files
app.py
CHANGED
@@ -288,12 +288,18 @@ class AbliterationProcessor:
|
|
288 |
modified_weight = down_proj_weight - scale_factor * torch.matmul(projection_matrix, down_proj_weight)
|
289 |
layer.mlp.down_proj.weight.data = modified_weight
|
290 |
|
291 |
-
def chat(self, message, history, max_new_tokens=2048):
|
292 |
"""Chat functionality"""
|
|
|
|
|
293 |
if self.model is None or self.tokenizer is None:
|
|
|
294 |
return "β οΈ Please load a model first!", history
|
295 |
|
296 |
try:
|
|
|
|
|
|
|
297 |
# Build conversation history
|
298 |
conversation = []
|
299 |
for msg in history:
|
@@ -308,13 +314,16 @@ class AbliterationProcessor:
|
|
308 |
|
309 |
# Add current message
|
310 |
conversation.append({"role": "user", "content": message})
|
|
|
311 |
|
312 |
# Generate tokens
|
|
|
313 |
toks = self.tokenizer.apply_chat_template(
|
314 |
conversation=conversation,
|
315 |
add_generation_prompt=True,
|
316 |
return_tensors="pt"
|
317 |
)
|
|
|
318 |
|
319 |
# Generate response with streaming like abliterated_optimized.py
|
320 |
from transformers import TextStreamer
|
@@ -328,25 +337,36 @@ class AbliterationProcessor:
|
|
328 |
self.captured = []
|
329 |
|
330 |
def on_finalized_text(self, text: str, stream_end: bool = False):
|
|
|
331 |
self.captured.append(text)
|
332 |
super().on_finalized_text(text, stream_end)
|
333 |
|
334 |
streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
335 |
|
|
|
336 |
gen = self.model.generate(
|
337 |
toks.to(self.model.device),
|
338 |
max_new_tokens=max_new_tokens,
|
339 |
-
temperature=
|
340 |
do_sample=True,
|
341 |
pad_token_id=self.tokenizer.eos_token_id,
|
342 |
streamer=streamer
|
343 |
)
|
344 |
|
|
|
|
|
|
|
345 |
# Get the complete response from streamer
|
346 |
response = "".join(streamer.captured).strip()
|
|
|
|
|
|
|
347 |
return response, history + [[message, response]]
|
348 |
|
349 |
except Exception as e:
|
|
|
|
|
|
|
350 |
return f"β Chat error: {str(e)}", history
|
351 |
|
352 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
@@ -472,16 +492,6 @@ def create_interface():
|
|
472 |
org_token.render()
|
473 |
private_repo.render()
|
474 |
|
475 |
-
gr.Markdown("### π¬ Chat Settings")
|
476 |
-
max_new_tokens = gr.Number(
|
477 |
-
value=2048,
|
478 |
-
label="Max New Tokens",
|
479 |
-
minimum=1,
|
480 |
-
maximum=8192,
|
481 |
-
step=1,
|
482 |
-
info="Maximum number of tokens to generate in chat responses"
|
483 |
-
)
|
484 |
-
|
485 |
process_btn = gr.Button("π Start Processing", variant="primary")
|
486 |
process_output = gr.Markdown(label="Processing Result")
|
487 |
process_image = gr.Image(show_label=False)
|
@@ -506,25 +516,48 @@ def create_interface():
|
|
506 |
|
507 |
# Chat tab
|
508 |
with gr.TabItem("π¬ Chat Test"):
|
509 |
-
chatbot = gr.Chatbot(
|
510 |
-
label="Chat Window",
|
511 |
-
height=400,
|
512 |
-
type="messages"
|
513 |
-
)
|
514 |
-
msg = gr.Textbox(
|
515 |
-
label="Input Message",
|
516 |
-
placeholder="Enter your question...",
|
517 |
-
lines=3
|
518 |
-
)
|
519 |
with gr.Row():
|
520 |
-
|
521 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
|
523 |
gr.Markdown("""
|
524 |
**Usage Tips:**
|
525 |
- Load a model first, then you can start chatting
|
526 |
- The processed model will have reduced refusal behavior
|
527 |
- You can test various sensitive questions
|
|
|
|
|
528 |
""")
|
529 |
|
530 |
# Bind events
|
@@ -548,18 +581,18 @@ def create_interface():
|
|
548 |
def user(user_message, history):
|
549 |
return "", history + [{"role": "user", "content": user_message}]
|
550 |
|
551 |
-
def bot(history, max_new_tokens):
|
552 |
if history and history[-1]["role"] == "user":
|
553 |
-
response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens)
|
554 |
history.append({"role": "assistant", "content": response})
|
555 |
return history
|
556 |
|
557 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
558 |
-
bot, [chatbot, max_new_tokens], chatbot
|
559 |
)
|
560 |
|
561 |
send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
562 |
-
bot, [chatbot, max_new_tokens], chatbot
|
563 |
)
|
564 |
|
565 |
clear.click(lambda: [], None, chatbot, queue=False)
|
|
|
288 |
modified_weight = down_proj_weight - scale_factor * torch.matmul(projection_matrix, down_proj_weight)
|
289 |
layer.mlp.down_proj.weight.data = modified_weight
|
290 |
|
291 |
+
def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
|
292 |
"""Chat functionality"""
|
293 |
+
print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
294 |
+
|
295 |
if self.model is None or self.tokenizer is None:
|
296 |
+
print("DEBUG: Model or tokenizer not loaded")
|
297 |
return "β οΈ Please load a model first!", history
|
298 |
|
299 |
try:
|
300 |
+
print(f"DEBUG: Processing message: {message[:100]}...")
|
301 |
+
print(f"DEBUG: History length: {len(history)}")
|
302 |
+
|
303 |
# Build conversation history
|
304 |
conversation = []
|
305 |
for msg in history:
|
|
|
314 |
|
315 |
# Add current message
|
316 |
conversation.append({"role": "user", "content": message})
|
317 |
+
print(f"DEBUG: Conversation length: {len(conversation)}")
|
318 |
|
319 |
# Generate tokens
|
320 |
+
print("DEBUG: Generating tokens...")
|
321 |
toks = self.tokenizer.apply_chat_template(
|
322 |
conversation=conversation,
|
323 |
add_generation_prompt=True,
|
324 |
return_tensors="pt"
|
325 |
)
|
326 |
+
print(f"DEBUG: Input tokens shape: {toks.shape}")
|
327 |
|
328 |
# Generate response with streaming like abliterated_optimized.py
|
329 |
from transformers import TextStreamer
|
|
|
337 |
self.captured = []
|
338 |
|
339 |
def on_finalized_text(self, text: str, stream_end: bool = False):
|
340 |
+
print(f"DEBUG: Streamer received text: '{text}' (stream_end={stream_end})")
|
341 |
self.captured.append(text)
|
342 |
super().on_finalized_text(text, stream_end)
|
343 |
|
344 |
streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
345 |
|
346 |
+
print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
347 |
gen = self.model.generate(
|
348 |
toks.to(self.model.device),
|
349 |
max_new_tokens=max_new_tokens,
|
350 |
+
temperature=temperature,
|
351 |
do_sample=True,
|
352 |
pad_token_id=self.tokenizer.eos_token_id,
|
353 |
streamer=streamer
|
354 |
)
|
355 |
|
356 |
+
print(f"DEBUG: Generation completed, output shape: {gen.shape}")
|
357 |
+
print(f"DEBUG: Streamer captured {len(streamer.captured)} text chunks")
|
358 |
+
|
359 |
# Get the complete response from streamer
|
360 |
response = "".join(streamer.captured).strip()
|
361 |
+
print(f"DEBUG: Final response length: {len(response)}")
|
362 |
+
print(f"DEBUG: Response preview: {response[:200]}...")
|
363 |
+
|
364 |
return response, history + [[message, response]]
|
365 |
|
366 |
except Exception as e:
|
367 |
+
print(f"DEBUG: Exception occurred: {str(e)}")
|
368 |
+
import traceback
|
369 |
+
traceback.print_exc()
|
370 |
return f"β Chat error: {str(e)}", history
|
371 |
|
372 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
|
|
492 |
org_token.render()
|
493 |
private_repo.render()
|
494 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
process_btn = gr.Button("π Start Processing", variant="primary")
|
496 |
process_output = gr.Markdown(label="Processing Result")
|
497 |
process_image = gr.Image(show_label=False)
|
|
|
516 |
|
517 |
# Chat tab
|
518 |
with gr.TabItem("π¬ Chat Test"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
519 |
with gr.Row():
|
520 |
+
with gr.Column(scale=3):
|
521 |
+
chatbot = gr.Chatbot(
|
522 |
+
label="Chat Window",
|
523 |
+
height=400,
|
524 |
+
type="messages"
|
525 |
+
)
|
526 |
+
msg = gr.Textbox(
|
527 |
+
label="Input Message",
|
528 |
+
placeholder="Enter your question...",
|
529 |
+
lines=3
|
530 |
+
)
|
531 |
+
with gr.Row():
|
532 |
+
send_btn = gr.Button("π€ Send", variant="primary")
|
533 |
+
clear = gr.Button("ποΈ Clear Chat")
|
534 |
+
|
535 |
+
with gr.Column(scale=1):
|
536 |
+
gr.Markdown("### βοΈ Chat Settings")
|
537 |
+
max_new_tokens = gr.Number(
|
538 |
+
value=2048,
|
539 |
+
label="Max New Tokens",
|
540 |
+
minimum=1,
|
541 |
+
maximum=8192,
|
542 |
+
step=1,
|
543 |
+
info="Maximum number of tokens to generate"
|
544 |
+
)
|
545 |
+
temperature = gr.Slider(
|
546 |
+
minimum=0.1,
|
547 |
+
maximum=2.0,
|
548 |
+
value=0.7,
|
549 |
+
step=0.1,
|
550 |
+
label="Temperature",
|
551 |
+
info="Higher values = more creative, Lower values = more focused"
|
552 |
+
)
|
553 |
|
554 |
gr.Markdown("""
|
555 |
**Usage Tips:**
|
556 |
- Load a model first, then you can start chatting
|
557 |
- The processed model will have reduced refusal behavior
|
558 |
- You can test various sensitive questions
|
559 |
+
- Adjust Max New Tokens to control response length
|
560 |
+
- Adjust Temperature to control creativity
|
561 |
""")
|
562 |
|
563 |
# Bind events
|
|
|
581 |
def user(user_message, history):
|
582 |
return "", history + [{"role": "user", "content": user_message}]
|
583 |
|
584 |
+
def bot(history, max_new_tokens, temperature):
|
585 |
if history and history[-1]["role"] == "user":
|
586 |
+
response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
|
587 |
history.append({"role": "assistant", "content": response})
|
588 |
return history
|
589 |
|
590 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
591 |
+
bot, [chatbot, max_new_tokens, temperature], chatbot
|
592 |
)
|
593 |
|
594 |
send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
595 |
+
bot, [chatbot, max_new_tokens, temperature], chatbot
|
596 |
)
|
597 |
|
598 |
clear.click(lambda: [], None, chatbot, queue=False)
|