AhmedHAnwar's picture
Update app.py
abd88b7 verified
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import threading
import gradio as gr
image_model_id = "Qwen/Qwen-VL-Chat-Int4"
image_tokenizer = AutoTokenizer.from_pretrained(image_model_id, trust_remote_code=True)
image_model = AutoModelForCausalLM.from_pretrained(image_model_id, device_map="cuda", trust_remote_code=True).eval()
# Load model and tokenizer
code_model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
code_tokenizer = AutoTokenizer.from_pretrained(code_model_id, trust_remote_code=True)
code_tokenizer.pad_token_id = code_tokenizer.eos_token_id
code_model = AutoModelForCausalLM.from_pretrained(
code_model_id,
torch_dtype="float16",
device_map="auto"
).eval()
stop_image_generation = threading.Event()
stop_code_generation = threading.Event()
def generate_response_image(uploaded_image, user_prompt, temperature, top_p, max_new_tokens):
stop_image_generation.clear()
temp_path = "/tmp/temp_image.png"
uploaded_image.save(temp_path)
image_sys_prompt = (
"You are a helpful assistant that describes images very concisely. "
"Provide a one-sentence summary of the image in less than 15 words. "
"Use simple, direct language."
)
# Compose prompt using tokenizer's helper
query_text = image_tokenizer.from_list_format([
{"image": temp_path},
{"text": f"<|system|>\n{image_sys_prompt}\n<|end|>"},
{"text": f"<|user|>\n{user_prompt}\n<|end|>"},
{"text": "<|assistant|>"}
])
# Tokenize the input text -> get input_ids and attention_mask tensors
inputs = image_tokenizer(query_text, return_tensors="pt").to("cuda")
streamer = TextIteratorStreamer(image_tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
streamer=streamer,
temperature=temperature,
top_p=top_p,
max_new_tokens=max_new_tokens,
do_sample=True,
use_cache=True,
return_dict_in_generate=True,
)
thread = threading.Thread(target=image_model.generate, kwargs=generation_kwargs)
thread.start()
response = ""
for new_text in streamer:
if stop_image_generation.is_set():
break
response += new_text
yield response
def stop_image_generation_func():
stop_image_generation.set()
return ""
def generate_stream_local(prompt, temperature, top_p, max_new_tokens):
stop_code_generation.clear()
inputs = code_tokenizer(prompt, return_tensors="pt").to(code_model.device)
streamer = TextIteratorStreamer(code_tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
streamer=streamer,
temperature=temperature,
top_p=top_p,
max_new_tokens=max_new_tokens,
do_sample=True,
use_cache=True,
return_dict_in_generate=True,
)
thread = threading.Thread(target=code_model.generate, kwargs=generation_kwargs)
thread.start()
for new_text in streamer:
if stop_code_generation.is_set():
break
yield new_text
# --- Respond logic for Gradio ---
def respond(message, temperature, top_p, max_new_tokens):
sys_prompt = (
"You are an AI coding assistant. If the user input is too vague to generate accurate code "
"(e.g., lacks programming language, method, or details), ask clarifying questions before attempting to write the code.\n"
"Think silently first and write your reasoning inside <think>...</think>. Then provide your final user-facing answer."
)
full_prompt = [
{"role": "system", "content": sys_prompt},
{"role": "user", "content": message}
]
prompt = code_tokenizer.apply_chat_template(full_prompt, tokenize=False, add_generation_prompt=True)
response = ""
for part in generate_stream_local(prompt, temperature, top_p, max_new_tokens):
response += part
yield response
# Future work should separate the reasoning process from the final answer.
# if "</think>" in response:
# yield response.split("</think>")[-1].strip()
def stop_code_generation_func():
stop_code_generation.set()
return "🧾 Generated Code Output"
with gr.Blocks(theme=gr.themes.Soft()) as demo:
# πŸ–ΌοΈ Image Description Tab
with gr.Tab("πŸ–ΌοΈ Image Description"):
gr.Markdown("## 🧠 Qwen-VL: Vision-Language Streaming Chat with Image Upload")
with gr.Row(equal_height=True):
with gr.Column(scale=1):
image_input = gr.Image(
type="pil",
label="πŸ“€ Upload Image",
height=480,
width=480
)
with gr.Column(scale=1):
prompt_input = gr.Textbox(
label="πŸ’¬ Prompt",
placeholder="e.g. Describe the image content",
value="Describe the picture",
lines=2
)
with gr.Row():
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.05,
label="🎲 Temperature",
info="Controls randomness. Higher = more creative."
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="πŸ” Top-p",
info="Cumulative probability for nucleus sampling."
)
max_new_tokens = gr.Slider(
minimum=50,
maximum=1000,
value=500,
step=10,
label="πŸ“ Max New Tokens",
info="Maximum length of generated output."
)
generate_btn = gr.Button("πŸš€ Generate Description", variant="primary")
stop_btn = gr.Button("⏹️ Stop and Clear", variant="stop")
output = gr.Textbox(
label="πŸ“„ Streaming Response",
placeholder="The model will respond here...",
lines=10,
interactive=False
)
generate_btn.click(
fn=generate_response_image,
inputs=[image_input, prompt_input, temperature, top_p, max_new_tokens],
outputs=output
)
stop_btn.click(fn=stop_image_generation_func, outputs=output)
# πŸ’» Code Generator Tab
with gr.Tab("πŸ’» Code Generator"):
gr.Markdown("## πŸ€– DeepSeek-R1-Distill-Qwen: Code Generation from Natural Language")
with gr.Row(equal_height=True):
with gr.Column(scale=2):
code_des = gr.Textbox(
label="🧾 Describe Your Code",
placeholder="e.g. Write a Python function to reverse a string",
lines=8
)
generate_code_btn = gr.Button("🧠 Generate Code", variant="primary")
stop_code_btn = gr.Button("⏹️ Stop and Clear", variant="stop")
with gr.Column(scale=1):
temperature_code = gr.Slider(
minimum=0.1,
maximum=1.5,
value=0.7,
step=0.05,
label="🎲 Temperature",
info="Higher = more creative code."
)
top_p_code = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="πŸ” Top-p",
info="Top-p sampling filter."
)
max_new_tokens_code = gr.Slider(
minimum=50,
maximum=2048,
value=1000,
step=10,
label="πŸ“ Max New Tokens",
info="Maximum token length of generated code."
)
output_code = gr.Markdown(
value="🧾 Generated Code Output",
label="🧾 Generated Code Output",
show_label=True,
visible=True,
container=True,
height = 300,
show_copy_button=True
)
generate_code_btn.click(
fn=respond,
inputs=[code_des, temperature_code, top_p_code, max_new_tokens_code],
outputs=output_code
)
stop_code_btn.click(fn=stop_code_generation_func, outputs=output_code)
demo.launch()