Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
import threading | |
import gradio as gr | |
image_model_id = "Qwen/Qwen-VL-Chat-Int4" | |
image_tokenizer = AutoTokenizer.from_pretrained(image_model_id, trust_remote_code=True) | |
image_model = AutoModelForCausalLM.from_pretrained(image_model_id, device_map="cuda", trust_remote_code=True).eval() | |
# Load model and tokenizer | |
code_model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" | |
code_tokenizer = AutoTokenizer.from_pretrained(code_model_id, trust_remote_code=True) | |
code_tokenizer.pad_token_id = code_tokenizer.eos_token_id | |
code_model = AutoModelForCausalLM.from_pretrained( | |
code_model_id, | |
torch_dtype="float16", | |
device_map="auto" | |
).eval() | |
stop_image_generation = threading.Event() | |
stop_code_generation = threading.Event() | |
def generate_response_image(uploaded_image, user_prompt, temperature, top_p, max_new_tokens): | |
stop_image_generation.clear() | |
temp_path = "/tmp/temp_image.png" | |
uploaded_image.save(temp_path) | |
image_sys_prompt = ( | |
"You are a helpful assistant that describes images very concisely. " | |
"Provide a one-sentence summary of the image in less than 15 words. " | |
"Use simple, direct language." | |
) | |
# Compose prompt using tokenizer's helper | |
query_text = image_tokenizer.from_list_format([ | |
{"image": temp_path}, | |
{"text": f"<|system|>\n{image_sys_prompt}\n<|end|>"}, | |
{"text": f"<|user|>\n{user_prompt}\n<|end|>"}, | |
{"text": "<|assistant|>"} | |
]) | |
# Tokenize the input text -> get input_ids and attention_mask tensors | |
inputs = image_tokenizer(query_text, return_tensors="pt").to("cuda") | |
streamer = TextIteratorStreamer(image_tokenizer, skip_prompt=True, skip_special_tokens=True) | |
generation_kwargs = dict( | |
**inputs, | |
streamer=streamer, | |
temperature=temperature, | |
top_p=top_p, | |
max_new_tokens=max_new_tokens, | |
do_sample=True, | |
use_cache=True, | |
return_dict_in_generate=True, | |
) | |
thread = threading.Thread(target=image_model.generate, kwargs=generation_kwargs) | |
thread.start() | |
response = "" | |
for new_text in streamer: | |
if stop_image_generation.is_set(): | |
break | |
response += new_text | |
yield response | |
def stop_image_generation_func(): | |
stop_image_generation.set() | |
return "" | |
def generate_stream_local(prompt, temperature, top_p, max_new_tokens): | |
stop_code_generation.clear() | |
inputs = code_tokenizer(prompt, return_tensors="pt").to(code_model.device) | |
streamer = TextIteratorStreamer(code_tokenizer, skip_prompt=True, skip_special_tokens=True) | |
generation_kwargs = dict( | |
**inputs, | |
streamer=streamer, | |
temperature=temperature, | |
top_p=top_p, | |
max_new_tokens=max_new_tokens, | |
do_sample=True, | |
use_cache=True, | |
return_dict_in_generate=True, | |
) | |
thread = threading.Thread(target=code_model.generate, kwargs=generation_kwargs) | |
thread.start() | |
for new_text in streamer: | |
if stop_code_generation.is_set(): | |
break | |
yield new_text | |
# --- Respond logic for Gradio --- | |
def respond(message, temperature, top_p, max_new_tokens): | |
sys_prompt = ( | |
"You are an AI coding assistant. If the user input is too vague to generate accurate code " | |
"(e.g., lacks programming language, method, or details), ask clarifying questions before attempting to write the code.\n" | |
"Think silently first and write your reasoning inside <think>...</think>. Then provide your final user-facing answer." | |
) | |
full_prompt = [ | |
{"role": "system", "content": sys_prompt}, | |
{"role": "user", "content": message} | |
] | |
prompt = code_tokenizer.apply_chat_template(full_prompt, tokenize=False, add_generation_prompt=True) | |
response = "" | |
for part in generate_stream_local(prompt, temperature, top_p, max_new_tokens): | |
response += part | |
yield response | |
# Future work should separate the reasoning process from the final answer. | |
# if "</think>" in response: | |
# yield response.split("</think>")[-1].strip() | |
def stop_code_generation_func(): | |
stop_code_generation.set() | |
return "π§Ύ Generated Code Output" | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
# πΌοΈ Image Description Tab | |
with gr.Tab("πΌοΈ Image Description"): | |
gr.Markdown("## π§ Qwen-VL: Vision-Language Streaming Chat with Image Upload") | |
with gr.Row(equal_height=True): | |
with gr.Column(scale=1): | |
image_input = gr.Image( | |
type="pil", | |
label="π€ Upload Image", | |
height=480, | |
width=480 | |
) | |
with gr.Column(scale=1): | |
prompt_input = gr.Textbox( | |
label="π¬ Prompt", | |
placeholder="e.g. Describe the image content", | |
value="Describe the picture", | |
lines=2 | |
) | |
with gr.Row(): | |
temperature = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.7, | |
step=0.05, | |
label="π² Temperature", | |
info="Controls randomness. Higher = more creative." | |
) | |
top_p = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="π Top-p", | |
info="Cumulative probability for nucleus sampling." | |
) | |
max_new_tokens = gr.Slider( | |
minimum=50, | |
maximum=1000, | |
value=500, | |
step=10, | |
label="π Max New Tokens", | |
info="Maximum length of generated output." | |
) | |
generate_btn = gr.Button("π Generate Description", variant="primary") | |
stop_btn = gr.Button("βΉοΈ Stop and Clear", variant="stop") | |
output = gr.Textbox( | |
label="π Streaming Response", | |
placeholder="The model will respond here...", | |
lines=10, | |
interactive=False | |
) | |
generate_btn.click( | |
fn=generate_response_image, | |
inputs=[image_input, prompt_input, temperature, top_p, max_new_tokens], | |
outputs=output | |
) | |
stop_btn.click(fn=stop_image_generation_func, outputs=output) | |
# π» Code Generator Tab | |
with gr.Tab("π» Code Generator"): | |
gr.Markdown("## π€ DeepSeek-R1-Distill-Qwen: Code Generation from Natural Language") | |
with gr.Row(equal_height=True): | |
with gr.Column(scale=2): | |
code_des = gr.Textbox( | |
label="π§Ύ Describe Your Code", | |
placeholder="e.g. Write a Python function to reverse a string", | |
lines=8 | |
) | |
generate_code_btn = gr.Button("π§ Generate Code", variant="primary") | |
stop_code_btn = gr.Button("βΉοΈ Stop and Clear", variant="stop") | |
with gr.Column(scale=1): | |
temperature_code = gr.Slider( | |
minimum=0.1, | |
maximum=1.5, | |
value=0.7, | |
step=0.05, | |
label="π² Temperature", | |
info="Higher = more creative code." | |
) | |
top_p_code = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="π Top-p", | |
info="Top-p sampling filter." | |
) | |
max_new_tokens_code = gr.Slider( | |
minimum=50, | |
maximum=2048, | |
value=1000, | |
step=10, | |
label="π Max New Tokens", | |
info="Maximum token length of generated code." | |
) | |
output_code = gr.Markdown( | |
value="π§Ύ Generated Code Output", | |
label="π§Ύ Generated Code Output", | |
show_label=True, | |
visible=True, | |
container=True, | |
height = 300, | |
show_copy_button=True | |
) | |
generate_code_btn.click( | |
fn=respond, | |
inputs=[code_des, temperature_code, top_p_code, max_new_tokens_code], | |
outputs=output_code | |
) | |
stop_code_btn.click(fn=stop_code_generation_func, outputs=output_code) | |
demo.launch() | |