Brianpuz commited on
Commit
3d9243c
Β·
1 Parent(s): 2408556

add debug print

Browse files
Files changed (1) hide show
  1. app.py +61 -28
app.py CHANGED
@@ -288,12 +288,18 @@ class AbliterationProcessor:
288
  modified_weight = down_proj_weight - scale_factor * torch.matmul(projection_matrix, down_proj_weight)
289
  layer.mlp.down_proj.weight.data = modified_weight
290
 
291
- def chat(self, message, history, max_new_tokens=2048):
292
  """Chat functionality"""
 
 
293
  if self.model is None or self.tokenizer is None:
 
294
  return "⚠️ Please load a model first!", history
295
 
296
  try:
 
 
 
297
  # Build conversation history
298
  conversation = []
299
  for msg in history:
@@ -308,13 +314,16 @@ class AbliterationProcessor:
308
 
309
  # Add current message
310
  conversation.append({"role": "user", "content": message})
 
311
 
312
  # Generate tokens
 
313
  toks = self.tokenizer.apply_chat_template(
314
  conversation=conversation,
315
  add_generation_prompt=True,
316
  return_tensors="pt"
317
  )
 
318
 
319
  # Generate response with streaming like abliterated_optimized.py
320
  from transformers import TextStreamer
@@ -328,25 +337,36 @@ class AbliterationProcessor:
328
  self.captured = []
329
 
330
  def on_finalized_text(self, text: str, stream_end: bool = False):
 
331
  self.captured.append(text)
332
  super().on_finalized_text(text, stream_end)
333
 
334
  streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
335
 
 
336
  gen = self.model.generate(
337
  toks.to(self.model.device),
338
  max_new_tokens=max_new_tokens,
339
- temperature=0.7,
340
  do_sample=True,
341
  pad_token_id=self.tokenizer.eos_token_id,
342
  streamer=streamer
343
  )
344
 
 
 
 
345
  # Get the complete response from streamer
346
  response = "".join(streamer.captured).strip()
 
 
 
347
  return response, history + [[message, response]]
348
 
349
  except Exception as e:
 
 
 
350
  return f"❌ Chat error: {str(e)}", history
351
 
352
  def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
@@ -472,16 +492,6 @@ def create_interface():
472
  org_token.render()
473
  private_repo.render()
474
 
475
- gr.Markdown("### πŸ’¬ Chat Settings")
476
- max_new_tokens = gr.Number(
477
- value=2048,
478
- label="Max New Tokens",
479
- minimum=1,
480
- maximum=8192,
481
- step=1,
482
- info="Maximum number of tokens to generate in chat responses"
483
- )
484
-
485
  process_btn = gr.Button("πŸš€ Start Processing", variant="primary")
486
  process_output = gr.Markdown(label="Processing Result")
487
  process_image = gr.Image(show_label=False)
@@ -506,25 +516,48 @@ def create_interface():
506
 
507
  # Chat tab
508
  with gr.TabItem("πŸ’¬ Chat Test"):
509
- chatbot = gr.Chatbot(
510
- label="Chat Window",
511
- height=400,
512
- type="messages"
513
- )
514
- msg = gr.Textbox(
515
- label="Input Message",
516
- placeholder="Enter your question...",
517
- lines=3
518
- )
519
  with gr.Row():
520
- send_btn = gr.Button("πŸ“€ Send", variant="primary")
521
- clear = gr.Button("πŸ—‘οΈ Clear Chat")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
 
523
  gr.Markdown("""
524
  **Usage Tips:**
525
  - Load a model first, then you can start chatting
526
  - The processed model will have reduced refusal behavior
527
  - You can test various sensitive questions
 
 
528
  """)
529
 
530
  # Bind events
@@ -548,18 +581,18 @@ def create_interface():
548
  def user(user_message, history):
549
  return "", history + [{"role": "user", "content": user_message}]
550
 
551
- def bot(history, max_new_tokens):
552
  if history and history[-1]["role"] == "user":
553
- response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens)
554
  history.append({"role": "assistant", "content": response})
555
  return history
556
 
557
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
558
- bot, [chatbot, max_new_tokens], chatbot
559
  )
560
 
561
  send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
562
- bot, [chatbot, max_new_tokens], chatbot
563
  )
564
 
565
  clear.click(lambda: [], None, chatbot, queue=False)
 
288
  modified_weight = down_proj_weight - scale_factor * torch.matmul(projection_matrix, down_proj_weight)
289
  layer.mlp.down_proj.weight.data = modified_weight
290
 
291
+ def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
292
  """Chat functionality"""
293
+ print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
294
+
295
  if self.model is None or self.tokenizer is None:
296
+ print("DEBUG: Model or tokenizer not loaded")
297
  return "⚠️ Please load a model first!", history
298
 
299
  try:
300
+ print(f"DEBUG: Processing message: {message[:100]}...")
301
+ print(f"DEBUG: History length: {len(history)}")
302
+
303
  # Build conversation history
304
  conversation = []
305
  for msg in history:
 
314
 
315
  # Add current message
316
  conversation.append({"role": "user", "content": message})
317
+ print(f"DEBUG: Conversation length: {len(conversation)}")
318
 
319
  # Generate tokens
320
+ print("DEBUG: Generating tokens...")
321
  toks = self.tokenizer.apply_chat_template(
322
  conversation=conversation,
323
  add_generation_prompt=True,
324
  return_tensors="pt"
325
  )
326
+ print(f"DEBUG: Input tokens shape: {toks.shape}")
327
 
328
  # Generate response with streaming like abliterated_optimized.py
329
  from transformers import TextStreamer
 
337
  self.captured = []
338
 
339
  def on_finalized_text(self, text: str, stream_end: bool = False):
340
+ print(f"DEBUG: Streamer received text: '{text}' (stream_end={stream_end})")
341
  self.captured.append(text)
342
  super().on_finalized_text(text, stream_end)
343
 
344
  streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
345
 
346
+ print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
347
  gen = self.model.generate(
348
  toks.to(self.model.device),
349
  max_new_tokens=max_new_tokens,
350
+ temperature=temperature,
351
  do_sample=True,
352
  pad_token_id=self.tokenizer.eos_token_id,
353
  streamer=streamer
354
  )
355
 
356
+ print(f"DEBUG: Generation completed, output shape: {gen.shape}")
357
+ print(f"DEBUG: Streamer captured {len(streamer.captured)} text chunks")
358
+
359
  # Get the complete response from streamer
360
  response = "".join(streamer.captured).strip()
361
+ print(f"DEBUG: Final response length: {len(response)}")
362
+ print(f"DEBUG: Response preview: {response[:200]}...")
363
+
364
  return response, history + [[message, response]]
365
 
366
  except Exception as e:
367
+ print(f"DEBUG: Exception occurred: {str(e)}")
368
+ import traceback
369
+ traceback.print_exc()
370
  return f"❌ Chat error: {str(e)}", history
371
 
372
  def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
 
492
  org_token.render()
493
  private_repo.render()
494
 
 
 
 
 
 
 
 
 
 
 
495
  process_btn = gr.Button("πŸš€ Start Processing", variant="primary")
496
  process_output = gr.Markdown(label="Processing Result")
497
  process_image = gr.Image(show_label=False)
 
516
 
517
  # Chat tab
518
  with gr.TabItem("πŸ’¬ Chat Test"):
 
 
 
 
 
 
 
 
 
 
519
  with gr.Row():
520
+ with gr.Column(scale=3):
521
+ chatbot = gr.Chatbot(
522
+ label="Chat Window",
523
+ height=400,
524
+ type="messages"
525
+ )
526
+ msg = gr.Textbox(
527
+ label="Input Message",
528
+ placeholder="Enter your question...",
529
+ lines=3
530
+ )
531
+ with gr.Row():
532
+ send_btn = gr.Button("πŸ“€ Send", variant="primary")
533
+ clear = gr.Button("πŸ—‘οΈ Clear Chat")
534
+
535
+ with gr.Column(scale=1):
536
+ gr.Markdown("### βš™οΈ Chat Settings")
537
+ max_new_tokens = gr.Number(
538
+ value=2048,
539
+ label="Max New Tokens",
540
+ minimum=1,
541
+ maximum=8192,
542
+ step=1,
543
+ info="Maximum number of tokens to generate"
544
+ )
545
+ temperature = gr.Slider(
546
+ minimum=0.1,
547
+ maximum=2.0,
548
+ value=0.7,
549
+ step=0.1,
550
+ label="Temperature",
551
+ info="Higher values = more creative, Lower values = more focused"
552
+ )
553
 
554
  gr.Markdown("""
555
  **Usage Tips:**
556
  - Load a model first, then you can start chatting
557
  - The processed model will have reduced refusal behavior
558
  - You can test various sensitive questions
559
+ - Adjust Max New Tokens to control response length
560
+ - Adjust Temperature to control creativity
561
  """)
562
 
563
  # Bind events
 
581
  def user(user_message, history):
582
  return "", history + [{"role": "user", "content": user_message}]
583
 
584
+ def bot(history, max_new_tokens, temperature):
585
  if history and history[-1]["role"] == "user":
586
+ response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
587
  history.append({"role": "assistant", "content": response})
588
  return history
589
 
590
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
591
+ bot, [chatbot, max_new_tokens, temperature], chatbot
592
  )
593
 
594
  send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
595
+ bot, [chatbot, max_new_tokens, temperature], chatbot
596
  )
597
 
598
  clear.click(lambda: [], None, chatbot, queue=False)