ColdSlim commited on
Commit
f03db3b
·
verified ·
1 Parent(s): 5aff8a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -51
app.py CHANGED
@@ -1,64 +1,92 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
27
 
28
- response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
41
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
 
 
 
 
62
 
63
- if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
1
  """
2
+ PetBull-7B-VL demo Space
3
+ ------------------------
4
+
5
+ • Base model : Qwen/Qwen2.5-VL-7B-Instruct
6
+ • LoRA adapter: ColdSlim/PetBull-7B (light-weight repo you just pushed)
7
+
8
+ Put this file in your Space, add a `requirements.txt` with:
9
+ transformers>=4.41.0
10
+ peft>=0.11.0
11
+ accelerate
12
+ gradio>=4.33
13
+
14
+ Then (optionally) switch the Space hardware to **GPU (shared)** in
15
+ Settings → Hardware for much faster vision-language inference.
16
  """
 
17
 
18
+ import torch, gradio as gr
19
+ from PIL import Image
20
+ from transformers import AutoProcessor, AutoModelForVision2Seq
21
+ from peft import PeftModel
22
 
23
+ # ---------------------------------------------------------------------
24
+ # 1 Load base + LoRA (≈ 12 GB VRAM in bf16; falls back to CPU if needed)
25
+ # ---------------------------------------------------------------------
26
+ BASE_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
27
+ ADAPTER_REPO = "ColdSlim/PetBull-7B" # 👉 replace with your HF path if different
 
 
 
 
28
 
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+ dtype = torch.float16 if device == "cuda" else torch.float32
 
 
 
31
 
32
+ processor = AutoProcessor.from_pretrained(BASE_MODEL, trust_remote_code=True)
33
+ base = AutoModelForVision2Seq.from_pretrained(
34
+ BASE_MODEL, torch_dtype=dtype, trust_remote_code=True)
35
+ model = PeftModel.from_pretrained(base, ADAPTER_REPO)
36
+ model.to(device).eval()
37
 
38
+ # ---------------------------------------------------------------------
39
+ # 2 Inference helper
40
+ # ---------------------------------------------------------------------
41
+ def generate_answer(image: Image.Image | None,
42
+ question: str,
43
+ temperature: float = 0.7,
44
+ top_p: float = 0.95,
45
+ max_tokens: int = 512) -> str:
46
+ """
47
+ Runs one‐shot VQA chat. Image is optional; if None we still obey the
48
+ prompt format required by Qwen-VL by inserting a blank white image.
49
+ """
50
+ if image is None:
51
+ image = Image.new("RGB", (224, 224), color="white")
52
 
53
+ inputs = processor(text=[question],
54
+ images=[image],
55
+ return_tensors="pt").to(device)
 
 
 
 
 
56
 
57
+ with torch.no_grad():
58
+ output_ids = model.generate(**inputs,
59
+ max_new_tokens=max_tokens,
60
+ temperature=temperature,
61
+ top_p=top_p)
62
+ return processor.batch_decode(output_ids,
63
+ skip_special_tokens=True)[0]
64
 
65
+ # ---------------------------------------------------------------------
66
+ # 3 Gradio UI
67
+ # ---------------------------------------------------------------------
68
+ with gr.Blocks(title="PetBull-7B-VL – Ask a Vet Bot") as demo:
69
+ gr.Markdown(
70
+ """
71
+ ## 🐾 PetBull-7B-VL
72
+ Upload a photo of your pet **and/or** ask a question.
73
+ The model will analyse the image (if provided) and give tailored advice.
74
+ """
75
+ )
76
 
77
+ with gr.Row():
78
+ with gr.Column(scale=1):
79
+ img_in = gr.Image(type="pil", label="Pet photo (optional)")
80
+ txt_in = gr.Textbox(lines=3, placeholder="Describe the issue or ask a question…")
81
+ run_btn = gr.Button("Ask PetBull")
82
+ temp_sl = gr.Slider(0.1, 1.5, 0.7, label="Temperature")
83
+ topp_sl = gr.Slider(0.1, 1.0, 0.95, label="Top-p")
84
+ max_sl = gr.Slider(32, 1024, 512, step=8, label="Max new tokens")
85
+ with gr.Column(scale=1):
86
+ answer = gr.Textbox(lines=12, label="Assistant", interactive=False)
 
 
 
 
 
 
 
 
87
 
88
+ run_btn.click(fn=generate_answer,
89
+ inputs=[img_in, txt_in, temp_sl, topp_sl, max_sl],
90
+ outputs=answer)
91
 
92
+ demo.queue().launch()