Spaces:
Running
Running
stream fix
Browse files
app.py
CHANGED
@@ -337,63 +337,7 @@ class AbliterationProcessor:
|
|
337 |
except Exception as e:
|
338 |
return f"❌ Chat error: {str(e)}", history
|
339 |
|
340 |
-
|
341 |
-
"""Streaming chat functionality"""
|
342 |
-
if self.model is None or self.tokenizer is None:
|
343 |
-
yield "⚠️ Please load a model first!"
|
344 |
-
return
|
345 |
-
|
346 |
-
try:
|
347 |
-
# Build conversation history
|
348 |
-
conversation = []
|
349 |
-
for msg in history:
|
350 |
-
if isinstance(msg, dict) and "role" in msg and "content" in msg:
|
351 |
-
conversation.append(msg)
|
352 |
-
elif isinstance(msg, list) and len(msg) == 2:
|
353 |
-
conversation.append({"role": "user", "content": msg[0]})
|
354 |
-
if msg[1]:
|
355 |
-
conversation.append({"role": "assistant", "content": msg[1]})
|
356 |
-
|
357 |
-
# Add current message
|
358 |
-
conversation.append({"role": "user", "content": message})
|
359 |
-
|
360 |
-
# Generate tokens
|
361 |
-
toks = self.tokenizer.apply_chat_template(
|
362 |
-
conversation=conversation,
|
363 |
-
add_generation_prompt=True,
|
364 |
-
return_tensors="pt"
|
365 |
-
)
|
366 |
-
|
367 |
-
# Stream response
|
368 |
-
from transformers import TextStreamer
|
369 |
-
|
370 |
-
class StreamingTextStreamer(TextStreamer):
|
371 |
-
def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
|
372 |
-
super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
|
373 |
-
self.current_text = ""
|
374 |
-
|
375 |
-
def on_finalized_text(self, text: str, stream_end: bool = False):
|
376 |
-
self.current_text += text
|
377 |
-
yield self.current_text
|
378 |
-
|
379 |
-
streamer = StreamingTextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
380 |
-
|
381 |
-
# Generate with streaming
|
382 |
-
gen = self.model.generate(
|
383 |
-
toks.to(self.model.device),
|
384 |
-
max_new_tokens=2048,
|
385 |
-
temperature=0.7,
|
386 |
-
do_sample=True,
|
387 |
-
pad_token_id=self.tokenizer.eos_token_id,
|
388 |
-
streamer=streamer
|
389 |
-
)
|
390 |
-
|
391 |
-
# Yield each chunk
|
392 |
-
for chunk in streamer.on_finalized_text("", False):
|
393 |
-
yield chunk
|
394 |
-
|
395 |
-
except Exception as e:
|
396 |
-
yield f"❌ Chat error: {str(e)}"
|
397 |
|
398 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
399 |
"""Create new model card"""
|
@@ -598,13 +542,14 @@ def create_interface():
|
|
598 |
|
599 |
def bot_stream(history):
|
600 |
if history and history[-1]["role"] == "user":
|
601 |
-
#
|
602 |
-
|
603 |
|
604 |
-
#
|
605 |
-
|
606 |
-
|
607 |
-
|
|
|
608 |
|
609 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
610 |
bot_stream, chatbot, chatbot
|
|
|
337 |
except Exception as e:
|
338 |
return f"❌ Chat error: {str(e)}", history
|
339 |
|
340 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
|
343 |
"""Create new model card"""
|
|
|
542 |
|
543 |
def bot_stream(history):
|
544 |
if history and history[-1]["role"] == "user":
|
545 |
+
# Get the full response first
|
546 |
+
response, _ = processor.chat(history[-1]["content"], history[:-1])
|
547 |
|
548 |
+
# Simulate streaming by yielding partial responses character by character
|
549 |
+
partial_response = ""
|
550 |
+
for char in response:
|
551 |
+
partial_response += char
|
552 |
+
yield history + [{"role": "assistant", "content": partial_response}]
|
553 |
|
554 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
555 |
bot_stream, chatbot, chatbot
|