saakshigupta commited on
Commit
954f59a
·
verified ·
1 Parent(s): 315eab1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -1
app.py CHANGED
@@ -45,12 +45,24 @@ def load_model():
45
  # Load processor
46
  processor = AutoProcessor.from_pretrained(base_model_id)
47
 
 
 
 
 
 
 
 
 
 
 
48
  # Load the pre-quantized model with unsloth settings
49
  model = AutoModelForCausalLM.from_pretrained(
50
  base_model_id,
51
  device_map="auto",
 
52
  torch_dtype=torch.float16,
53
- trust_remote_code=True
 
54
  )
55
 
56
  # Load adapter
 
45
  # Load processor
46
  processor = AutoProcessor.from_pretrained(base_model_id)
47
 
48
+ # Configure quantization settings
49
+ quantization_config = BitsAndBytesConfig(
50
+ load_in_4bit=True,
51
+ bnb_4bit_compute_dtype=torch.float16,
52
+ bnb_4bit_use_double_quant=True,
53
+ bnb_4bit_quant_type="nf4",
54
+ bnb_4bit_quant_storage=torch.float16,
55
+ llm_int8_skip_modules=["lm_head"]
56
+ )
57
+
58
  # Load the pre-quantized model with unsloth settings
59
  model = AutoModelForCausalLM.from_pretrained(
60
  base_model_id,
61
  device_map="auto",
62
+ quantization_config=quantization_config,
63
  torch_dtype=torch.float16,
64
+ trust_remote_code=True,
65
+ low_cpu_mem_usage=True
66
  )
67
 
68
  # Load adapter