Spaces:
Running
Running
try
Browse files
app.py
CHANGED
@@ -289,7 +289,7 @@ class AbliterationProcessor:
|
|
289 |
layer.mlp.down_proj.weight.data = modified_weight
|
290 |
|
291 |
def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
|
292 |
-
"""Chat functionality"""
|
293 |
print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
294 |
|
295 |
if self.model is None or self.tokenizer is None:
|
@@ -325,25 +325,14 @@ class AbliterationProcessor:
|
|
325 |
)
|
326 |
print(f"DEBUG: Input tokens shape: {toks.shape}")
|
327 |
|
328 |
-
# Generate response with streaming
|
329 |
-
|
330 |
-
|
331 |
-
# Create a custom streamer that captures all output
|
332 |
-
captured_output = []
|
333 |
-
|
334 |
-
class CustomStreamer(TextStreamer):
|
335 |
-
def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
|
336 |
-
super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
|
337 |
-
self.captured = []
|
338 |
-
|
339 |
-
def on_finalized_text(self, text: str, stream_end: bool = False):
|
340 |
-
print(f"DEBUG: Streamer received text: '{text}' (stream_end={stream_end})")
|
341 |
-
self.captured.append(text)
|
342 |
-
super().on_finalized_text(text, stream_end)
|
343 |
|
344 |
-
|
|
|
|
|
345 |
|
346 |
-
|
347 |
gen = self.model.generate(
|
348 |
toks.to(self.model.device),
|
349 |
max_new_tokens=max_new_tokens,
|
@@ -353,21 +342,20 @@ class AbliterationProcessor:
|
|
353 |
streamer=streamer
|
354 |
)
|
355 |
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
response = "".join(streamer.captured).strip()
|
361 |
-
print(f"DEBUG: Final response length: {len(response)}")
|
362 |
-
print(f"DEBUG: Response preview: {response[:200]}...")
|
363 |
|
364 |
-
return
|
365 |
|
366 |
except Exception as e:
|
367 |
print(f"DEBUG: Exception occurred: {str(e)}")
|
368 |
import traceback
|
369 |
traceback.print_exc()
|
370 |
return f"❌ Chat error: {str(e)}", history
|
|
|
|
|
371 |
|
372 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
373 |
"""Create new model card"""
|
@@ -577,12 +565,13 @@ def create_interface():
|
|
577 |
outputs=[process_output, process_image]
|
578 |
)
|
579 |
|
580 |
-
# Chat functionality
|
581 |
def user(user_message, history):
|
582 |
return "", history + [{"role": "user", "content": user_message}]
|
583 |
|
584 |
def bot(history, max_new_tokens, temperature):
|
585 |
if history and history[-1]["role"] == "user":
|
|
|
586 |
response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
|
587 |
history.append({"role": "assistant", "content": response})
|
588 |
return history
|
|
|
289 |
layer.mlp.down_proj.weight.data = modified_weight
|
290 |
|
291 |
def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
|
292 |
+
"""Chat functionality with streaming output"""
|
293 |
print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
294 |
|
295 |
if self.model is None or self.tokenizer is None:
|
|
|
325 |
)
|
326 |
print(f"DEBUG: Input tokens shape: {toks.shape}")
|
327 |
|
328 |
+
# Generate response with streaming
|
329 |
+
print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
|
331 |
+
# Use TextStreamer to show output in real-time
|
332 |
+
from transformers import TextStreamer
|
333 |
+
streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
334 |
|
335 |
+
# Generate with streamer to show output in console
|
336 |
gen = self.model.generate(
|
337 |
toks.to(self.model.device),
|
338 |
max_new_tokens=max_new_tokens,
|
|
|
342 |
streamer=streamer
|
343 |
)
|
344 |
|
345 |
+
# Decode the generated tokens
|
346 |
+
generated_text = self.tokenizer.decode(gen[0][toks.shape[1]:], skip_special_tokens=True)
|
347 |
+
print(f"DEBUG: Generated text length: {len(generated_text)}")
|
348 |
+
print(f"DEBUG: Generated text preview: {generated_text[:200]}...")
|
|
|
|
|
|
|
349 |
|
350 |
+
return generated_text, history + [[message, generated_text]]
|
351 |
|
352 |
except Exception as e:
|
353 |
print(f"DEBUG: Exception occurred: {str(e)}")
|
354 |
import traceback
|
355 |
traceback.print_exc()
|
356 |
return f"❌ Chat error: {str(e)}", history
|
357 |
+
|
358 |
+
|
359 |
|
360 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
361 |
"""Create new model card"""
|
|
|
565 |
outputs=[process_output, process_image]
|
566 |
)
|
567 |
|
568 |
+
# Chat functionality with streaming
|
569 |
def user(user_message, history):
|
570 |
return "", history + [{"role": "user", "content": user_message}]
|
571 |
|
572 |
def bot(history, max_new_tokens, temperature):
|
573 |
if history and history[-1]["role"] == "user":
|
574 |
+
# Get complete response first
|
575 |
response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
|
576 |
history.append({"role": "assistant", "content": response})
|
577 |
return history
|