Spaces:

saakshigupta
/

deepfake_detection_uq

Paused

App Files Files Community

saakshigupta commited on May 15

Commit

286126e

verified ·

1 Parent(s): c861c68

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -37

app.py CHANGED Viewed

@@ -197,6 +197,9 @@ with st.sidebar:
                 model = load_xception_model()
                 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                 if model is not None:
                     st.session_state.xception_model = model
                     st.session_state.device = device
@@ -253,14 +256,34 @@ with st.sidebar:
     if not st.session_state.llm_model_loaded:
         if st.button("📥 Load Vision LLM", type="primary"):
             # Load LLM model
-            model, tokenizer = load_llm_model()
-            if model is not None and tokenizer is not None:
-                st.session_state.llm_model = model
-                st.session_state.tokenizer = tokenizer
-                st.session_state.llm_model_loaded = True
-                st.success("✅ Vision LLM loaded!")
-            else:
-                st.error("❌ Failed to load Vision LLM.")
     else:
         st.success("✅ Vision LLM loaded")
@@ -519,33 +542,6 @@ def fix_cross_attention_mask(inputs):
         inputs['cross_attention_mask'] = new_mask
     return inputs
-# Load model function
-@st.cache_resource
-def load_llm_model():
-    with st.spinner("Loading LLM vision model... This may take a few minutes. Please be patient..."):
-        try:
-            # Check for GPU
-            has_gpu = check_gpu()
-            # Load base model and tokenizer using Unsloth
-            base_model_id = "unsloth/llama-3.2-11b-vision-instruct"
-            model, tokenizer = FastVisionModel.from_pretrained(
-                base_model_id,
-                load_in_4bit=True,
-            )
-            # Load the adapter
-            adapter_id = "saakshigupta/deepfake-explainer-new"
-            model = PeftModel.from_pretrained(model, adapter_id)
-            # Set to inference mode
-            FastVisionModel.for_inference(model)
-            return model, tokenizer
-        except Exception as e:
-            st.error(f"Error loading model: {str(e)}")
-            return None, None
 # Analyze image function
 def analyze_image_with_llm(image, gradcam_overlay, face_box, pred_label, confidence, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""):
     # Create a prompt that includes GradCAM information
@@ -876,7 +872,8 @@ def main():
                         device = st.session_state.device
                         model = st.session_state.xception_model
-                        # Ensure model is in eval mode
                         model.eval()
                         # Move tensor to device
@@ -918,7 +915,7 @@ def main():
                         st.subheader("GradCAM Visualization")
                         try:
                             cam, overlay, comparison, detected_face_box = process_image_with_xception_gradcam(
-                                image, model, device, pred_class
                             )
                             if comparison:

                 model = load_xception_model()
                 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                # Explicitly move model to device
+                model = model.to(device)
                 if model is not None:
                     st.session_state.xception_model = model
                     st.session_state.device = device
     if not st.session_state.llm_model_loaded:
         if st.button("📥 Load Vision LLM", type="primary"):
             # Load LLM model
+            try:
+                with st.spinner("Loading LLM vision model... This may take a few minutes. Please be patient..."):
+                    # Check for GPU
+                    has_gpu = check_gpu()
+                    # Load base model and tokenizer using Unsloth
+                    base_model_id = "unsloth/llama-3.2-11b-vision-instruct"
+                    model, tokenizer = FastVisionModel.from_pretrained(
+                        base_model_id,
+                        load_in_4bit=True,
+                    )
+                    # Load the adapter
+                    adapter_id = "saakshigupta/deepfake-explainer-new"
+                    model = PeftModel.from_pretrained(model, adapter_id)
+                    # Set to inference mode
+                    FastVisionModel.for_inference(model)
+                    if model is not None and tokenizer is not None:
+                        st.session_state.llm_model = model
+                        st.session_state.tokenizer = tokenizer
+                        st.session_state.llm_model_loaded = True
+                        st.success("✅ Vision LLM loaded!")
+                    else:
+                        st.error("❌ Failed to load Vision LLM.")
+            except Exception as e:
+                st.error(f"Error loading LLM model: {str(e)}")
     else:
         st.success("✅ Vision LLM loaded")
         inputs['cross_attention_mask'] = new_mask
     return inputs
 # Analyze image function
 def analyze_image_with_llm(image, gradcam_overlay, face_box, pred_label, confidence, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""):
     # Create a prompt that includes GradCAM information
                         device = st.session_state.device
                         model = st.session_state.xception_model
+                        # Ensure model is in eval mode and on the correct device
+                        model = model.to(device)
                         model.eval()
                         # Move tensor to device
                         st.subheader("GradCAM Visualization")
                         try:
                             cam, overlay, comparison, detected_face_box = process_image_with_xception_gradcam(
+                                image, model.to(device), device, pred_class
                             )
                             if comparison: