Brianpuz commited on
Commit
005d861
·
1 Parent(s): 3d9243c
Files changed (1) hide show
  1. app.py +16 -27
app.py CHANGED
@@ -289,7 +289,7 @@ class AbliterationProcessor:
289
  layer.mlp.down_proj.weight.data = modified_weight
290
 
291
  def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
292
- """Chat functionality"""
293
  print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
294
 
295
  if self.model is None or self.tokenizer is None:
@@ -325,25 +325,14 @@ class AbliterationProcessor:
325
  )
326
  print(f"DEBUG: Input tokens shape: {toks.shape}")
327
 
328
- # Generate response with streaming like abliterated_optimized.py
329
- from transformers import TextStreamer
330
-
331
- # Create a custom streamer that captures all output
332
- captured_output = []
333
-
334
- class CustomStreamer(TextStreamer):
335
- def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
336
- super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
337
- self.captured = []
338
-
339
- def on_finalized_text(self, text: str, stream_end: bool = False):
340
- print(f"DEBUG: Streamer received text: '{text}' (stream_end={stream_end})")
341
- self.captured.append(text)
342
- super().on_finalized_text(text, stream_end)
343
 
344
- streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
345
 
346
- print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
347
  gen = self.model.generate(
348
  toks.to(self.model.device),
349
  max_new_tokens=max_new_tokens,
@@ -353,21 +342,20 @@ class AbliterationProcessor:
353
  streamer=streamer
354
  )
355
 
356
- print(f"DEBUG: Generation completed, output shape: {gen.shape}")
357
- print(f"DEBUG: Streamer captured {len(streamer.captured)} text chunks")
358
-
359
- # Get the complete response from streamer
360
- response = "".join(streamer.captured).strip()
361
- print(f"DEBUG: Final response length: {len(response)}")
362
- print(f"DEBUG: Response preview: {response[:200]}...")
363
 
364
- return response, history + [[message, response]]
365
 
366
  except Exception as e:
367
  print(f"DEBUG: Exception occurred: {str(e)}")
368
  import traceback
369
  traceback.print_exc()
370
  return f"❌ Chat error: {str(e)}", history
 
 
371
 
372
  def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
373
  """Create new model card"""
@@ -577,12 +565,13 @@ def create_interface():
577
  outputs=[process_output, process_image]
578
  )
579
 
580
- # Chat functionality
581
  def user(user_message, history):
582
  return "", history + [{"role": "user", "content": user_message}]
583
 
584
  def bot(history, max_new_tokens, temperature):
585
  if history and history[-1]["role"] == "user":
 
586
  response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
587
  history.append({"role": "assistant", "content": response})
588
  return history
 
289
  layer.mlp.down_proj.weight.data = modified_weight
290
 
291
  def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
292
+ """Chat functionality with streaming output"""
293
  print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
294
 
295
  if self.model is None or self.tokenizer is None:
 
325
  )
326
  print(f"DEBUG: Input tokens shape: {toks.shape}")
327
 
328
+ # Generate response with streaming
329
+ print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
+ # Use TextStreamer to show output in real-time
332
+ from transformers import TextStreamer
333
+ streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
334
 
335
+ # Generate with streamer to show output in console
336
  gen = self.model.generate(
337
  toks.to(self.model.device),
338
  max_new_tokens=max_new_tokens,
 
342
  streamer=streamer
343
  )
344
 
345
+ # Decode the generated tokens
346
+ generated_text = self.tokenizer.decode(gen[0][toks.shape[1]:], skip_special_tokens=True)
347
+ print(f"DEBUG: Generated text length: {len(generated_text)}")
348
+ print(f"DEBUG: Generated text preview: {generated_text[:200]}...")
 
 
 
349
 
350
+ return generated_text, history + [[message, generated_text]]
351
 
352
  except Exception as e:
353
  print(f"DEBUG: Exception occurred: {str(e)}")
354
  import traceback
355
  traceback.print_exc()
356
  return f"❌ Chat error: {str(e)}", history
357
+
358
+
359
 
360
  def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
361
  """Create new model card"""
 
565
  outputs=[process_output, process_image]
566
  )
567
 
568
+ # Chat functionality with streaming
569
  def user(user_message, history):
570
  return "", history + [{"role": "user", "content": user_message}]
571
 
572
  def bot(history, max_new_tokens, temperature):
573
  if history and history[-1]["role"] == "user":
574
+ # Get complete response first
575
  response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
576
  history.append({"role": "assistant", "content": response})
577
  return history