ColdSlim commited on
Commit
97a1db9
·
verified ·
1 Parent(s): a142860

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -19,6 +19,7 @@ import torch, gradio as gr
19
  from PIL import Image
20
  from transformers import AutoProcessor, AutoModelForVision2Seq
21
  from peft import PeftModel
 
22
 
23
  # ---------------------------------------------------------------------
24
  # 1 Load base + LoRA (≈ 12 GB VRAM in bf16; falls back to CPU if needed)
@@ -31,8 +32,16 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
31
  dtype = torch.float16 if device == "cuda" else torch.float32
32
 
33
  processor = AutoProcessor.from_pretrained(BASE_MODEL, trust_remote_code=True)
 
 
 
 
 
 
 
 
34
  base = AutoModelForVision2Seq.from_pretrained(
35
- BASE_MODEL, torch_dtype=dtype, trust_remote_code=True)
36
  model = PeftModel.from_pretrained(base, ADAPTER_REPO, revision=ADAPTER_REV)
37
  model.to(device).eval()
38
 
 
19
  from PIL import Image
20
  from transformers import AutoProcessor, AutoModelForVision2Seq
21
  from peft import PeftModel
22
+ from transformers import BitsAndBytesConfig
23
 
24
  # ---------------------------------------------------------------------
25
  # 1 Load base + LoRA (≈ 12 GB VRAM in bf16; falls back to CPU if needed)
 
32
  dtype = torch.float16 if device == "cuda" else torch.float32
33
 
34
  processor = AutoProcessor.from_pretrained(BASE_MODEL, trust_remote_code=True)
35
+
36
+ quant_cfg = BitsAndBytesConfig(
37
+ load_in_4bit=True,
38
+ bnb_4bit_compute_dtype=torch.float16,
39
+ bnb_4bit_use_double_quant=True,
40
+ bnb_4bit_quant_type="nf4"
41
+ )
42
+
43
  base = AutoModelForVision2Seq.from_pretrained(
44
+ BASE_MODEL, device_map="auto", torch_dtype=torch.float16, quantization_config=quant_cfg, trust_remote_code=True)
45
  model = PeftModel.from_pretrained(base, ADAPTER_REPO, revision=ADAPTER_REV)
46
  model.to(device).eval()
47