Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

saakshigupta commited on Apr 2

Commit

954f59a

verified ·

1 Parent(s): 315eab1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,12 +45,24 @@ def load_model():
         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
         # Load the pre-quantized model with unsloth settings
         model = AutoModelForCausalLM.from_pretrained(
             base_model_id,
             device_map="auto",
             torch_dtype=torch.float16,
-            trust_remote_code=True
         )
         # Load adapter

         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
+        # Configure quantization settings
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_quant_storage=torch.float16,
+            llm_int8_skip_modules=["lm_head"]
+        )
         # Load the pre-quantized model with unsloth settings
         model = AutoModelForCausalLM.from_pretrained(
             base_model_id,
             device_map="auto",
+            quantization_config=quantization_config,
             torch_dtype=torch.float16,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
         )
         # Load adapter