Spaces:

Metal3d
/

reasoning-deepseek-qwen2

Running on Zero

App Files Files Community

Metal3d commited on Mar 20

Commit

bc76ed6

unverified ·

1 Parent(s): 2c7beb2

Use thread, all in chat function

Browse files

Files changed (1) hide show

main.py +33 -64

main.py CHANGED Viewed

@@ -1,6 +1,5 @@
-import asyncio
-import functools
 import re
 import gradio as gr
 import spaces
@@ -46,12 +45,6 @@ print(model.config)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-async def stream(streamer):
-    for txt in streamer:
-        await asyncio.sleep(0.01)
-        yield txt
 def reformat_math(text):
     """Fix MathJax delimiters to use the Gradio syntax.
@@ -64,79 +57,55 @@ def reformat_math(text):
 @spaces.GPU
-def generate(history):
     text = tokenizer.apply_chat_template(
-        history,
         tokenize=False,
         add_generation_prompt=True,
     )
-    try:
-        loop = asyncio.get_event_loop()
-    except:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
-    task = loop.run_in_executor(
-        None,
-        functools.partial(
-            model.generate,
             max_new_tokens=1024 * 128,
             streamer=streamer,
             **model_inputs,
         ),
-    )
-    return task, streamer
-async def chat(prompt, history):
-    """Respond to a chat prompt."""
-    message = {
-        "role": "user",
-        "content": prompt,
-    }
-    # build the messages list
-    history = [] if history is None else history
-    message_list = history + [message]
-    task, streamer = generate(message_list)
     buffer = ""
     reasoning = ""
     thinking = False
-    try:
-        async for new_text in stream(streamer):
-            if task.cancelled():
-                print("Cancelled")
-                break  # Stop le streaming si la tâche est annulée
-            if not thinking and "<think>" in new_text:
-                thinking = True
-                continue
-            if thinking and "</think>" in new_text:
-                thinking = False
-                continue
-            if thinking:
-                reasoning += new_text
-                heading = "# Reasoning\n\n"
-                yield "I'm thinking, please wait a moment...", heading + reasoning
-                continue
-            buffer += new_text
-            yield reformat_math(buffer), reasoning
-    except asyncio.CancelledError:
-        # this doesn't work, I don't find a way to stop generation thread
-        print("Cancelled by exception")
-        streamer.on_finalized_text("cancelled", True)
-        print("Signal sent")
-        raise
 chat_bot = gr.Chatbot(

 import re
+import threading
 import gradio as gr
 import spaces
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def reformat_math(text):
     """Fix MathJax delimiters to use the Gradio syntax.
 @spaces.GPU
+def chat(prompt, history):
+    """Respond to a chat prompt."""
+    message = {
+        "role": "user",
+        "content": prompt,
+    }
+    # build the messages list
+    history = [] if history is None else history
+    message_list = history + [message]
     text = tokenizer.apply_chat_template(
+        message_list,
         tokenize=False,
         add_generation_prompt=True,
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
+    threading.Thread(
+        target=model.generate,
+        kwargs=dict(
             max_new_tokens=1024 * 128,
             streamer=streamer,
             **model_inputs,
         ),
+    ).start()
     buffer = ""
     reasoning = ""
     thinking = False
+    for new_text in streamer:
+        if not thinking and "<think>" in new_text:
+            thinking = True
+            continue
+        if thinking and "</think>" in new_text:
+            thinking = False
+            continue
+        if thinking:
+            reasoning += new_text
+            heading = "# Reasoning\n\n"
+            yield "I'm thinking, please wait a moment...", heading + reasoning
+            continue
+        buffer += new_text
+        yield reformat_math(buffer), reasoning
 chat_bot = gr.Chatbot(