""" PetBull‑7B‑VL demo – ZeroGPU‑ready """ import os import torch import spaces # <-- NEW: import spaces for ZeroGPU import gradio as gr from PIL import Image from transformers import AutoProcessor, AutoModelForVision2Seq from peft import PeftModel # 0. Environment tweaks for Accelerate (unchanged) os.environ["ACCELERATE_USE_SLOW_RETRIEVAL"] = "true" # 1. Config BASE_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct" ADAPTER_REPO = "ColdSlim/PetBull-7B" ADAPTER_REV = "master" OFFLOAD_DIR = "offload" dtype = torch.float16 # <-- use float16 for GPU # 2. Load processor processor = AutoProcessor.from_pretrained(BASE_MODEL, trust_remote_code=True) # 3. Load base model on CPU; stream shards to save RAM base = AutoModelForVision2Seq.from_pretrained( BASE_MODEL, torch_dtype=dtype, low_cpu_mem_usage=True, device_map={"": "cpu"}, offload_folder=OFFLOAD_DIR, trust_remote_code=True, ) # 4. Attach LoRA adapter on CPU model = PeftModel.from_pretrained( base, ADAPTER_REPO, revision=ADAPTER_REV, device_map={"": "cpu"}, ).eval() # Keep track of whether the model has been moved to GPU _model_on_gpu = False # 5. Inference helper – run on GPU when called @spaces.GPU # <-- NEW: request GPU for this function:contentReference[oaicite:3]{index=3} def generate_answer(image, question: str, temperature: float = 0.7, top_p: float = 0.95, max_tokens: int = 256): global _model_on_gpu # provide a placeholder image if none was uploaded if image is None: image = Image.new("RGB", (224, 224), color="white") # move model to GPU once if not _model_on_gpu: model.to("cuda") _model_on_gpu = True # prepare inputs on GPU inputs = processor(text=[question], images=[image], return_tensors="pt").to("cuda") with torch.no_grad(): output_ids = model.generate(**inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p) # decode on CPU outputs = output_ids.to("cpu") return processor.batch_decode(outputs, skip_special_tokens=True)[0] # 6. Gradio UI (unchanged except for title) with gr.Blocks(title="PetBull‑7B‑VL (ZeroGPU)") as demo: gr.Markdown( "## PetBull‑7B‑VL – Ask a Vet\n" "Upload a photo and/or type a question." ) with gr.Row(): with gr.Column(): img_in = gr.Image(type="pil", label="Pet photo (optional)") txt_in = gr.Textbox(lines=3, placeholder="Describe the issue…") ask = gr.Button("Ask PetBull") temp = gr.Slider(0.1, 1.5, 0.7, label="Temperature") topp = gr.Slider(0.1, 1.0, 0.95, label="Top‑p") max_tok = gr.Slider(32, 512, 256, step=8, label="Max tokens") with gr.Column(): answer = gr.Textbox(lines=12, label="Assistant", interactive=False) ask.click(generate_answer, inputs=[img_in, txt_in, temp, topp, max_tok], outputs=answer) demo.queue().launch()