Spaces:
Running
on
Zero
Running
on
Zero
""" | |
PetBull-7B-VL demo Space | |
------------------------ | |
• Base model : Qwen/Qwen2.5-VL-7B-Instruct | |
• LoRA adapter: ColdSlim/PetBull-7B (light-weight repo you just pushed) | |
Put this file in your Space, add a `requirements.txt` with: | |
transformers>=4.41.0 | |
peft>=0.11.0 | |
accelerate | |
gradio>=4.33 | |
Then (optionally) switch the Space hardware to **GPU (shared)** in | |
Settings → Hardware for much faster vision-language inference. | |
""" | |
import torch, gradio as gr | |
from PIL import Image | |
from transformers import AutoProcessor, AutoModelForVision2Seq | |
from peft import PeftModel | |
# --------------------------------------------------------------------- | |
# 1 Load base + LoRA (≈ 12 GB VRAM in bf16; falls back to CPU if needed) | |
# --------------------------------------------------------------------- | |
BASE_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct" | |
ADAPTER_REPO = "ColdSlim/PetBull-7B" # 👉 replace with your HF path if different | |
ADAPTER_REV = "master" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
dtype = torch.float16 if device == "cuda" else torch.float32 | |
processor = AutoProcessor.from_pretrained(BASE_MODEL, trust_remote_code=True) | |
base = AutoModelForVision2Seq.from_pretrained( | |
BASE_MODEL, torch_dtype=dtype, trust_remote_code=True) | |
model = PeftModel.from_pretrained(base, ADAPTER_REPO, revision=ADAPTER_REV) | |
model.to(device).eval() | |
# --------------------------------------------------------------------- | |
# 2 Inference helper | |
# --------------------------------------------------------------------- | |
def generate_answer(image: Image.Image | None, | |
question: str, | |
temperature: float = 0.7, | |
top_p: float = 0.95, | |
max_tokens: int = 512) -> str: | |
""" | |
Runs one‐shot VQA chat. Image is optional; if None we still obey the | |
prompt format required by Qwen-VL by inserting a blank white image. | |
""" | |
if image is None: | |
image = Image.new("RGB", (224, 224), color="white") | |
inputs = processor(text=[question], | |
images=[image], | |
return_tensors="pt").to(device) | |
with torch.no_grad(): | |
output_ids = model.generate(**inputs, | |
max_new_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p) | |
return processor.batch_decode(output_ids, | |
skip_special_tokens=True)[0] | |
# --------------------------------------------------------------------- | |
# 3 Gradio UI | |
# --------------------------------------------------------------------- | |
with gr.Blocks(title="PetBull-7B-VL – Ask a Vet Bot") as demo: | |
gr.Markdown( | |
""" | |
## 🐾 PetBull-7B-VL | |
Upload a photo of your pet **and/or** ask a question. | |
The model will analyse the image (if provided) and give tailored advice. | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
img_in = gr.Image(type="pil", label="Pet photo (optional)") | |
txt_in = gr.Textbox(lines=3, placeholder="Describe the issue or ask a question…") | |
run_btn = gr.Button("Ask PetBull") | |
temp_sl = gr.Slider(0.1, 1.5, 0.7, label="Temperature") | |
topp_sl = gr.Slider(0.1, 1.0, 0.95, label="Top-p") | |
max_sl = gr.Slider(32, 1024, 512, step=8, label="Max new tokens") | |
with gr.Column(scale=1): | |
answer = gr.Textbox(lines=12, label="Assistant", interactive=False) | |
run_btn.click(fn=generate_answer, | |
inputs=[img_in, txt_in, temp_sl, topp_sl, max_sl], | |
outputs=answer) | |
demo.queue().launch() | |