Brianpuz commited on
Commit
1eeb055
·
1 Parent(s): 8c92c76

stream fix

Browse files
Files changed (1) hide show
  1. app.py +8 -63
app.py CHANGED
@@ -337,63 +337,7 @@ class AbliterationProcessor:
337
  except Exception as e:
338
  return f"❌ Chat error: {str(e)}", history
339
 
340
- def chat_stream(self, message, history):
341
- """Streaming chat functionality"""
342
- if self.model is None or self.tokenizer is None:
343
- yield "⚠️ Please load a model first!"
344
- return
345
-
346
- try:
347
- # Build conversation history
348
- conversation = []
349
- for msg in history:
350
- if isinstance(msg, dict) and "role" in msg and "content" in msg:
351
- conversation.append(msg)
352
- elif isinstance(msg, list) and len(msg) == 2:
353
- conversation.append({"role": "user", "content": msg[0]})
354
- if msg[1]:
355
- conversation.append({"role": "assistant", "content": msg[1]})
356
-
357
- # Add current message
358
- conversation.append({"role": "user", "content": message})
359
-
360
- # Generate tokens
361
- toks = self.tokenizer.apply_chat_template(
362
- conversation=conversation,
363
- add_generation_prompt=True,
364
- return_tensors="pt"
365
- )
366
-
367
- # Stream response
368
- from transformers import TextStreamer
369
-
370
- class StreamingTextStreamer(TextStreamer):
371
- def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
372
- super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
373
- self.current_text = ""
374
-
375
- def on_finalized_text(self, text: str, stream_end: bool = False):
376
- self.current_text += text
377
- yield self.current_text
378
-
379
- streamer = StreamingTextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
380
-
381
- # Generate with streaming
382
- gen = self.model.generate(
383
- toks.to(self.model.device),
384
- max_new_tokens=2048,
385
- temperature=0.7,
386
- do_sample=True,
387
- pad_token_id=self.tokenizer.eos_token_id,
388
- streamer=streamer
389
- )
390
-
391
- # Yield each chunk
392
- for chunk in streamer.on_finalized_text("", False):
393
- yield chunk
394
-
395
- except Exception as e:
396
- yield f"❌ Chat error: {str(e)}"
397
 
398
  def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
399
  """Create new model card"""
@@ -598,13 +542,14 @@ def create_interface():
598
 
599
  def bot_stream(history):
600
  if history and history[-1]["role"] == "user":
601
- # Start with empty assistant message
602
- history.append({"role": "assistant", "content": ""})
603
 
604
- # Get streaming response
605
- for response_chunk in processor.chat_stream(history[-2]["content"], history[:-2]):
606
- history[-1]["content"] = response_chunk
607
- yield history
 
608
 
609
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
610
  bot_stream, chatbot, chatbot
 
337
  except Exception as e:
338
  return f"❌ Chat error: {str(e)}", history
339
 
340
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
  def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
343
  """Create new model card"""
 
542
 
543
  def bot_stream(history):
544
  if history and history[-1]["role"] == "user":
545
+ # Get the full response first
546
+ response, _ = processor.chat(history[-1]["content"], history[:-1])
547
 
548
+ # Simulate streaming by yielding partial responses character by character
549
+ partial_response = ""
550
+ for char in response:
551
+ partial_response += char
552
+ yield history + [{"role": "assistant", "content": partial_response}]
553
 
554
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
555
  bot_stream, chatbot, chatbot