Metal3d commited on
Commit
e0c9e04
·
unverified ·
1 Parent(s): d22bb98

Initial version

Browse files
Files changed (3) hide show
  1. README.md +8 -1
  2. main.py +167 -0
  3. requirements.txt +1 -0
README.md CHANGED
@@ -11,4 +11,11 @@ license: mit
11
  short_description: A simple chat interface to see the reasoning process
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
11
  short_description: A simple chat interface to see the reasoning process
12
  ---
13
 
14
+ Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
15
+ =======
16
+
17
+ # A simple UI to show how works reasoning model
18
+
19
+ This is a simple UI made with [gradio](https://gradio.app/) to show how works a reasoning model.
20
+
21
+ On the left, this is the legacy chat interface. On the right, the model's reasoning process output.
main.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import functools
3
+ import re
4
+
5
+ import gradio as gr
6
+ import spaces
7
+ from transformers import AsyncTextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
8
+
9
+ JS = """
10
+ () => {
11
+ // auto scroll .auto-scroll elements when text has changed
12
+ const observer = new MutationObserver((mutations) => {
13
+ mutations.forEach((mutation) => {
14
+ // find the parent element with .auto-scroll class and having the "overflow"
15
+ // style attribute to "auto"
16
+ let element = mutation.target;
17
+ while(element.parentElement !== null && element.parentElement.style.overflow !== "auto") {
18
+ element = element.parentElement;
19
+ }
20
+ if (element.parentElement === null) {
21
+ return;
22
+ }
23
+ element = element.parentElement;
24
+ element.scrollTop = element.scrollHeight;
25
+ });
26
+ })
27
+ document.querySelectorAll('.auto-scroll > *').forEach((elem) => {
28
+ console.log("observing", elem)
29
+ observer.observe(elem, {
30
+ childList: true,
31
+ characterData: true,
32
+ })
33
+ });
34
+ }
35
+ """
36
+
37
+ model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
38
+
39
+ model = AutoModelForCausalLM.from_pretrained(
40
+ model_name,
41
+ torch_dtype="auto",
42
+ device_map="auto",
43
+ )
44
+ print(dir(model))
45
+ print(model.config)
46
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
47
+
48
+
49
+ def reformat_math(text):
50
+ """Fix MathJax delimiters to use the Gradio syntax.
51
+
52
+ This is a workaround to display math formulas in Gradio. For now, I havn't found a way to
53
+ make it work as expected using others latex_delimites...
54
+ """
55
+ text = re.sub(r"\\\[\s*(.*?)\s*\\\]", r"$$\1$$", text, flags=re.DOTALL)
56
+ text = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", text, flags=re.DOTALL)
57
+ return text
58
+
59
+
60
+ @spaces.GPU
61
+ def generate(model, **kwargs):
62
+ """Geneerate text using the model."""
63
+ model.generate(**kwargs)
64
+
65
+
66
+ async def chat(prompt, history):
67
+ """Respond to a chat prompt."""
68
+ message = {
69
+ "role": "user",
70
+ "content": prompt,
71
+ }
72
+
73
+ history = [] if history is None else history
74
+ text = tokenizer.apply_chat_template(
75
+ history + [message],
76
+ tokenize=False,
77
+ add_generation_prompt=True,
78
+ )
79
+
80
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
81
+ streamer = AsyncTextIteratorStreamer(tokenizer, skip_special_tokens=True)
82
+
83
+ task = asyncio.get_running_loop().run_in_executor(
84
+ None,
85
+ functools.partial(
86
+ generate,
87
+ model,
88
+ **model_inputs,
89
+ max_new_tokens=1024 * 128,
90
+ streamer=streamer,
91
+ ),
92
+ )
93
+
94
+ buffer = ""
95
+ reasoning = ""
96
+ thinking = False
97
+
98
+ try:
99
+ async for new_text in streamer:
100
+ if task.done() or task.cancelled():
101
+ print("Cancelled")
102
+ break # Stop le streaming si la tâche est annulée
103
+
104
+ if not thinking and "<think>" in new_text:
105
+ thinking = True
106
+ continue
107
+ if thinking and "</think>" in new_text:
108
+ thinking = False
109
+ continue
110
+
111
+ if thinking:
112
+ reasoning += new_text
113
+ heading = "# Reasoning\n\n"
114
+ yield "I'm thinking, please wait a moment...", heading + reasoning
115
+ continue
116
+
117
+ buffer += new_text
118
+ yield reformat_math(buffer), reasoning
119
+
120
+ except asyncio.CancelledError:
121
+ # this doesn't work, I don't find a way to stop generation thread
122
+ print("Cancelled")
123
+ streamer.on_finalized_text("cancelled", True)
124
+ print("Signal sent")
125
+ raise
126
+
127
+
128
+ chat_bot = gr.Chatbot(
129
+ latex_delimiters=[
130
+ {"left": "$$", "right": "$$", "display": True},
131
+ {"left": "$", "right": "$", "display": False},
132
+ ],
133
+ scale=1,
134
+ type="messages",
135
+ )
136
+
137
+ with gr.Blocks(js=JS) as demo:
138
+ reasoning = gr.Markdown(
139
+ "# Reasoning\n\nWhen the model will reasoning, its thoughts will be displayed here.",
140
+ label="Reasoning",
141
+ show_label=True,
142
+ container=True,
143
+ elem_classes="auto-scroll",
144
+ max_height="90vh",
145
+ render=False,
146
+ )
147
+ with gr.Row(equal_height=True, height="90vh"):
148
+ with gr.Column(scale=3):
149
+ gr.ChatInterface(
150
+ chat,
151
+ type="messages",
152
+ chatbot=chat_bot,
153
+ title=str(model_name),
154
+ description=(
155
+ f"*{model_name}* is a large language model "
156
+ "trained on a mixture of instruction and "
157
+ "conversational data."
158
+ ),
159
+ additional_outputs=[reasoning],
160
+ )
161
+
162
+ with gr.Column():
163
+ reasoning.render()
164
+
165
+
166
+ if __name__ == "__main__":
167
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ transformers