saakshigupta commited on
Commit
aa3f85c
·
verified ·
1 Parent(s): 954f59a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -45,14 +45,15 @@ def load_model():
45
  # Load processor
46
  processor = AutoProcessor.from_pretrained(base_model_id)
47
 
48
- # Configure quantization settings
49
  quantization_config = BitsAndBytesConfig(
50
  load_in_4bit=True,
51
  bnb_4bit_compute_dtype=torch.float16,
52
  bnb_4bit_use_double_quant=True,
53
  bnb_4bit_quant_type="nf4",
54
  bnb_4bit_quant_storage=torch.float16,
55
- llm_int8_skip_modules=["lm_head"]
 
56
  )
57
 
58
  # Load the pre-quantized model with unsloth settings
@@ -62,7 +63,9 @@ def load_model():
62
  quantization_config=quantization_config,
63
  torch_dtype=torch.float16,
64
  trust_remote_code=True,
65
- low_cpu_mem_usage=True
 
 
66
  )
67
 
68
  # Load adapter
 
45
  # Load processor
46
  processor = AutoProcessor.from_pretrained(base_model_id)
47
 
48
+ # Configure quantization settings for unsloth model
49
  quantization_config = BitsAndBytesConfig(
50
  load_in_4bit=True,
51
  bnb_4bit_compute_dtype=torch.float16,
52
  bnb_4bit_use_double_quant=True,
53
  bnb_4bit_quant_type="nf4",
54
  bnb_4bit_quant_storage=torch.float16,
55
+ llm_int8_skip_modules=["lm_head"],
56
+ llm_int8_enable_fp32_cpu_offload=True
57
  )
58
 
59
  # Load the pre-quantized model with unsloth settings
 
63
  quantization_config=quantization_config,
64
  torch_dtype=torch.float16,
65
  trust_remote_code=True,
66
+ low_cpu_mem_usage=True,
67
+ use_cache=True,
68
+ offload_folder="offload" # Enable disk offloading
69
  )
70
 
71
  # Load adapter