Spaces:

Metal3d
/

reasoning-deepseek-qwen2

Sleeping

App Files Files Community

Metal3d commited on Mar 20

Commit

181e1d1

unverified ·

1 Parent(s): 48a12f0

Remove asyncio :(

Browse files

Files changed (1) hide show

main.py +33 -46

main.py CHANGED Viewed

@@ -1,10 +1,9 @@
-import asyncio
-import functools
 import re
 import gradio as gr
 import spaces
-from transformers import AsyncTextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
 JS = """
 () => {
@@ -58,29 +57,29 @@ def reformat_math(text):
 @spaces.GPU
-def _generate(history):
     text = tokenizer.apply_chat_template(
-        history,
         tokenize=False,
         add_generation_prompt=True,
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    streamer = AsyncTextIteratorStreamer(tokenizer, skip_special_tokens=True)
-    task = asyncio.get_running_loop().run_in_executor(
-        None,
-        functools.partial(
-            model.generate,
-            max_new_tokens=1024 * 128,
-            streamer=streamer,
             **model_inputs,
-        ),
-    )
-    return task, streamer
-async def chat(prompt, history):
     """Respond to a chat prompt."""
     message = {
         "role": "user",
@@ -92,40 +91,28 @@ async def chat(prompt, history):
     message_list = history + [message]
     # get the task and the streamer
-    task, streamer = _generate(message_list)
     buffer = ""
     reasoning = ""
     thinking = False
-    try:
-        async for new_text in streamer:
-            if task.done() or task.cancelled():
-                print("Cancelled")
-                break  # Stop le streaming si la tâche est annulée
-            if not thinking and "<think>" in new_text:
-                thinking = True
-                continue
-            if thinking and "</think>" in new_text:
-                thinking = False
-                continue
-            if thinking:
-                reasoning += new_text
-                heading = "# Reasoning\n\n"
-                yield "I'm thinking, please wait a moment...", heading + reasoning
-                continue
-            buffer += new_text
-            yield reformat_math(buffer), reasoning
-    except asyncio.CancelledError:
-        # this doesn't work, I don't find a way to stop generation thread
-        print("Cancelled")
-        streamer.on_finalized_text("cancelled", True)
-        print("Signal sent")
-        raise
 chat_bot = gr.Chatbot(

 import re
+import threading
 import gradio as gr
 import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 JS = """
 () => {
 @spaces.GPU
+def generate(messages):
     text = tokenizer.apply_chat_template(
+        messages,
         tokenize=False,
         add_generation_prompt=True,
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
+    threading.Thread(
+        target=model.generate,
+        kwargs={
+            "max_new_tokens": 1024 * 128,
+            "streamer": streamer,
             **model_inputs,
+        },
+    ).start()
+    return streamer
+def chat(prompt, history):
     """Respond to a chat prompt."""
     message = {
         "role": "user",
     message_list = history + [message]
     # get the task and the streamer
+    streamer = generate(message_list)
     buffer = ""
     reasoning = ""
     thinking = False
+    for new_text in streamer:
+        if not thinking and "<think>" in new_text:
+            thinking = True
+            continue
+        if thinking and "</think>" in new_text:
+            thinking = False
+            continue
+        if thinking:
+            reasoning += new_text
+            heading = "# Reasoning\n\n"
+            yield "I'm thinking, please wait a moment...", heading + reasoning
+            continue
+        buffer += new_text
+        yield reformat_math(buffer), reasoning
 chat_bot = gr.Chatbot(