AhmedHAnwar commited on
Commit
abd88b7
Β·
verified Β·
1 Parent(s): c3607be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +245 -0
app.py CHANGED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
2
+ import threading
3
+ import gradio as gr
4
+
5
+ image_model_id = "Qwen/Qwen-VL-Chat-Int4"
6
+ image_tokenizer = AutoTokenizer.from_pretrained(image_model_id, trust_remote_code=True)
7
+
8
+ image_model = AutoModelForCausalLM.from_pretrained(image_model_id, device_map="cuda", trust_remote_code=True).eval()
9
+
10
+ # Load model and tokenizer
11
+ code_model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
12
+ code_tokenizer = AutoTokenizer.from_pretrained(code_model_id, trust_remote_code=True)
13
+ code_tokenizer.pad_token_id = code_tokenizer.eos_token_id
14
+ code_model = AutoModelForCausalLM.from_pretrained(
15
+ code_model_id,
16
+ torch_dtype="float16",
17
+ device_map="auto"
18
+ ).eval()
19
+
20
+
21
+ stop_image_generation = threading.Event()
22
+ stop_code_generation = threading.Event()
23
+
24
+ def generate_response_image(uploaded_image, user_prompt, temperature, top_p, max_new_tokens):
25
+ stop_image_generation.clear()
26
+ temp_path = "/tmp/temp_image.png"
27
+ uploaded_image.save(temp_path)
28
+
29
+ image_sys_prompt = (
30
+ "You are a helpful assistant that describes images very concisely. "
31
+ "Provide a one-sentence summary of the image in less than 15 words. "
32
+ "Use simple, direct language."
33
+ )
34
+
35
+ # Compose prompt using tokenizer's helper
36
+ query_text = image_tokenizer.from_list_format([
37
+ {"image": temp_path},
38
+ {"text": f"<|system|>\n{image_sys_prompt}\n<|end|>"},
39
+ {"text": f"<|user|>\n{user_prompt}\n<|end|>"},
40
+ {"text": "<|assistant|>"}
41
+ ])
42
+
43
+ # Tokenize the input text -> get input_ids and attention_mask tensors
44
+ inputs = image_tokenizer(query_text, return_tensors="pt").to("cuda")
45
+ streamer = TextIteratorStreamer(image_tokenizer, skip_prompt=True, skip_special_tokens=True)
46
+
47
+ generation_kwargs = dict(
48
+ **inputs,
49
+ streamer=streamer,
50
+ temperature=temperature,
51
+ top_p=top_p,
52
+ max_new_tokens=max_new_tokens,
53
+ do_sample=True,
54
+ use_cache=True,
55
+ return_dict_in_generate=True,
56
+ )
57
+
58
+ thread = threading.Thread(target=image_model.generate, kwargs=generation_kwargs)
59
+ thread.start()
60
+
61
+ response = ""
62
+ for new_text in streamer:
63
+ if stop_image_generation.is_set():
64
+ break
65
+ response += new_text
66
+ yield response
67
+
68
+ def stop_image_generation_func():
69
+ stop_image_generation.set()
70
+ return ""
71
+
72
+ def generate_stream_local(prompt, temperature, top_p, max_new_tokens):
73
+ stop_code_generation.clear()
74
+ inputs = code_tokenizer(prompt, return_tensors="pt").to(code_model.device)
75
+
76
+ streamer = TextIteratorStreamer(code_tokenizer, skip_prompt=True, skip_special_tokens=True)
77
+
78
+ generation_kwargs = dict(
79
+ **inputs,
80
+ streamer=streamer,
81
+ temperature=temperature,
82
+ top_p=top_p,
83
+ max_new_tokens=max_new_tokens,
84
+ do_sample=True,
85
+ use_cache=True,
86
+ return_dict_in_generate=True,
87
+ )
88
+
89
+ thread = threading.Thread(target=code_model.generate, kwargs=generation_kwargs)
90
+ thread.start()
91
+
92
+ for new_text in streamer:
93
+ if stop_code_generation.is_set():
94
+ break
95
+ yield new_text
96
+
97
+ # --- Respond logic for Gradio ---
98
+ def respond(message, temperature, top_p, max_new_tokens):
99
+ sys_prompt = (
100
+ "You are an AI coding assistant. If the user input is too vague to generate accurate code "
101
+ "(e.g., lacks programming language, method, or details), ask clarifying questions before attempting to write the code.\n"
102
+ "Think silently first and write your reasoning inside <think>...</think>. Then provide your final user-facing answer."
103
+ )
104
+
105
+ full_prompt = [
106
+ {"role": "system", "content": sys_prompt},
107
+ {"role": "user", "content": message}
108
+ ]
109
+ prompt = code_tokenizer.apply_chat_template(full_prompt, tokenize=False, add_generation_prompt=True)
110
+
111
+ response = ""
112
+ for part in generate_stream_local(prompt, temperature, top_p, max_new_tokens):
113
+ response += part
114
+ yield response
115
+ # Future work should separate the reasoning process from the final answer.
116
+ # if "</think>" in response:
117
+ # yield response.split("</think>")[-1].strip()
118
+
119
+ def stop_code_generation_func():
120
+ stop_code_generation.set()
121
+ return "🧾 Generated Code Output"
122
+
123
+
124
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
125
+
126
+ # πŸ–ΌοΈ Image Description Tab
127
+ with gr.Tab("πŸ–ΌοΈ Image Description"):
128
+ gr.Markdown("## 🧠 Qwen-VL: Vision-Language Streaming Chat with Image Upload")
129
+
130
+ with gr.Row(equal_height=True):
131
+ with gr.Column(scale=1):
132
+ image_input = gr.Image(
133
+ type="pil",
134
+ label="πŸ“€ Upload Image",
135
+ height=480,
136
+ width=480
137
+ )
138
+ with gr.Column(scale=1):
139
+ prompt_input = gr.Textbox(
140
+ label="πŸ’¬ Prompt",
141
+ placeholder="e.g. Describe the image content",
142
+ value="Describe the picture",
143
+ lines=2
144
+ )
145
+ with gr.Row():
146
+ temperature = gr.Slider(
147
+ minimum=0.1,
148
+ maximum=1.0,
149
+ value=0.7,
150
+ step=0.05,
151
+ label="🎲 Temperature",
152
+ info="Controls randomness. Higher = more creative."
153
+ )
154
+ top_p = gr.Slider(
155
+ minimum=0.1,
156
+ maximum=1.0,
157
+ value=0.95,
158
+ step=0.05,
159
+ label="πŸ” Top-p",
160
+ info="Cumulative probability for nucleus sampling."
161
+ )
162
+ max_new_tokens = gr.Slider(
163
+ minimum=50,
164
+ maximum=1000,
165
+ value=500,
166
+ step=10,
167
+ label="πŸ“ Max New Tokens",
168
+ info="Maximum length of generated output."
169
+ )
170
+ generate_btn = gr.Button("πŸš€ Generate Description", variant="primary")
171
+ stop_btn = gr.Button("⏹️ Stop and Clear", variant="stop")
172
+
173
+ output = gr.Textbox(
174
+ label="πŸ“„ Streaming Response",
175
+ placeholder="The model will respond here...",
176
+ lines=10,
177
+ interactive=False
178
+ )
179
+
180
+ generate_btn.click(
181
+ fn=generate_response_image,
182
+ inputs=[image_input, prompt_input, temperature, top_p, max_new_tokens],
183
+ outputs=output
184
+ )
185
+
186
+ stop_btn.click(fn=stop_image_generation_func, outputs=output)
187
+
188
+ # πŸ’» Code Generator Tab
189
+ with gr.Tab("πŸ’» Code Generator"):
190
+ gr.Markdown("## πŸ€– DeepSeek-R1-Distill-Qwen: Code Generation from Natural Language")
191
+
192
+ with gr.Row(equal_height=True):
193
+ with gr.Column(scale=2):
194
+ code_des = gr.Textbox(
195
+ label="🧾 Describe Your Code",
196
+ placeholder="e.g. Write a Python function to reverse a string",
197
+ lines=8
198
+ )
199
+ generate_code_btn = gr.Button("🧠 Generate Code", variant="primary")
200
+ stop_code_btn = gr.Button("⏹️ Stop and Clear", variant="stop")
201
+
202
+ with gr.Column(scale=1):
203
+ temperature_code = gr.Slider(
204
+ minimum=0.1,
205
+ maximum=1.5,
206
+ value=0.7,
207
+ step=0.05,
208
+ label="🎲 Temperature",
209
+ info="Higher = more creative code."
210
+ )
211
+ top_p_code = gr.Slider(
212
+ minimum=0.1,
213
+ maximum=1.0,
214
+ value=0.95,
215
+ step=0.05,
216
+ label="πŸ” Top-p",
217
+ info="Top-p sampling filter."
218
+ )
219
+ max_new_tokens_code = gr.Slider(
220
+ minimum=50,
221
+ maximum=2048,
222
+ value=1000,
223
+ step=10,
224
+ label="πŸ“ Max New Tokens",
225
+ info="Maximum token length of generated code."
226
+ )
227
+
228
+ output_code = gr.Markdown(
229
+ value="🧾 Generated Code Output",
230
+ label="🧾 Generated Code Output",
231
+ show_label=True,
232
+ visible=True,
233
+ container=True,
234
+ height = 300,
235
+ show_copy_button=True
236
+ )
237
+
238
+ generate_code_btn.click(
239
+ fn=respond,
240
+ inputs=[code_des, temperature_code, top_p_code, max_new_tokens_code],
241
+ outputs=output_code
242
+ )
243
+ stop_code_btn.click(fn=stop_code_generation_func, outputs=output_code)
244
+
245
+ demo.launch()