Spaces:

Ruurd
/

radiolm

Sleeping

Ruurd commited on Apr 14

Commit

22564e3

1 Parent(s): cb98777

Stop at eos token

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ def chat_with_model(messages):
     prompt = format_prompt(messages)
     inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
-    streamer = TextIteratorStreamer(current_tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(
         **inputs,
         max_new_tokens=256,
@@ -37,7 +37,10 @@ def chat_with_model(messages):
     for new_text in streamer:
         output_text += new_text
         messages[-1]["content"] = output_text
         yield messages
     current_model.to("cpu")
     torch.cuda.empty_cache()

     prompt = format_prompt(messages)
     inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
+    streamer = TextIteratorStreamer(current_tokenizer, skip_prompt=True, skip_special_tokens=False)
     generation_kwargs = dict(
         **inputs,
         max_new_tokens=256,
     for new_text in streamer:
         output_text += new_text
         messages[-1]["content"] = output_text
+        if current_tokenizer.eos_token and current_tokenizer.eos_token in output_text:
+            break
         yield messages
     current_model.to("cpu")
     torch.cuda.empty_cache()