DermalCare / app.py
ColdSlim's picture
Update app.py
c8b3c1b verified
raw
history blame
3.22 kB
"""
PetBull‑7B‑VL demo – ZeroGPU‑ready
"""
import os
import torch
import spaces # <-- NEW: import spaces for ZeroGPU
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, AutoModelForVision2Seq
from peft import PeftModel
# 0. Environment tweaks for Accelerate (unchanged)
os.environ["ACCELERATE_USE_SLOW_RETRIEVAL"] = "true"
# 1. Config
BASE_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
ADAPTER_REPO = "ColdSlim/PetBull-7B"
ADAPTER_REV = "master"
OFFLOAD_DIR = "offload"
dtype = torch.float16 # <-- use float16 for GPU
# 2. Load processor
processor = AutoProcessor.from_pretrained(BASE_MODEL, trust_remote_code=True)
# 3. Load base model on CPU; stream shards to save RAM
base = AutoModelForVision2Seq.from_pretrained(
BASE_MODEL,
torch_dtype=dtype,
low_cpu_mem_usage=True,
device_map={"": "cpu"},
offload_folder=OFFLOAD_DIR,
trust_remote_code=True,
)
# 4. Attach LoRA adapter on CPU
model = PeftModel.from_pretrained(
base,
ADAPTER_REPO,
revision=ADAPTER_REV,
device_map={"": "cpu"},
).eval()
# Keep track of whether the model has been moved to GPU
_model_on_gpu = False
# 5. Inference helper – run on GPU when called
@spaces.GPU # <-- NEW: request GPU for this function:contentReference[oaicite:3]{index=3}
def generate_answer(image, question: str,
temperature: float = 0.7,
top_p: float = 0.95,
max_tokens: int = 256):
global _model_on_gpu
# provide a placeholder image if none was uploaded
if image is None:
image = Image.new("RGB", (224, 224), color="white")
# move model to GPU once
if not _model_on_gpu:
model.to("cuda")
_model_on_gpu = True
# prepare inputs on GPU
inputs = processor(text=[question], images=[image],
return_tensors="pt").to("cuda")
with torch.no_grad():
output_ids = model.generate(**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p)
# decode on CPU
outputs = output_ids.to("cpu")
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
# 6. Gradio UI (unchanged except for title)
with gr.Blocks(title="PetBull‑7B‑VL (ZeroGPU)") as demo:
gr.Markdown(
"## PetBull‑7B‑VL – Ask a Vet\n"
"Upload a photo and/or type a question."
)
with gr.Row():
with gr.Column():
img_in = gr.Image(type="pil", label="Pet photo (optional)")
txt_in = gr.Textbox(lines=3, placeholder="Describe the issue…")
ask = gr.Button("Ask PetBull")
temp = gr.Slider(0.1, 1.5, 0.7, label="Temperature")
topp = gr.Slider(0.1, 1.0, 0.95, label="Top‑p")
max_tok = gr.Slider(32, 512, 256, step=8, label="Max tokens")
with gr.Column():
answer = gr.Textbox(lines=12, label="Assistant", interactive=False)
ask.click(generate_answer,
inputs=[img_in, txt_in, temp, topp, max_tok],
outputs=answer)
demo.queue().launch()