saakshigupta commited on
Commit
286126e
Β·
verified Β·
1 Parent(s): c861c68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -37
app.py CHANGED
@@ -197,6 +197,9 @@ with st.sidebar:
197
  model = load_xception_model()
198
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
199
 
 
 
 
200
  if model is not None:
201
  st.session_state.xception_model = model
202
  st.session_state.device = device
@@ -253,14 +256,34 @@ with st.sidebar:
253
  if not st.session_state.llm_model_loaded:
254
  if st.button("πŸ“₯ Load Vision LLM", type="primary"):
255
  # Load LLM model
256
- model, tokenizer = load_llm_model()
257
- if model is not None and tokenizer is not None:
258
- st.session_state.llm_model = model
259
- st.session_state.tokenizer = tokenizer
260
- st.session_state.llm_model_loaded = True
261
- st.success("βœ… Vision LLM loaded!")
262
- else:
263
- st.error("❌ Failed to load Vision LLM.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  else:
265
  st.success("βœ… Vision LLM loaded")
266
 
@@ -519,33 +542,6 @@ def fix_cross_attention_mask(inputs):
519
  inputs['cross_attention_mask'] = new_mask
520
  return inputs
521
 
522
- # Load model function
523
- @st.cache_resource
524
- def load_llm_model():
525
- with st.spinner("Loading LLM vision model... This may take a few minutes. Please be patient..."):
526
- try:
527
- # Check for GPU
528
- has_gpu = check_gpu()
529
-
530
- # Load base model and tokenizer using Unsloth
531
- base_model_id = "unsloth/llama-3.2-11b-vision-instruct"
532
- model, tokenizer = FastVisionModel.from_pretrained(
533
- base_model_id,
534
- load_in_4bit=True,
535
- )
536
-
537
- # Load the adapter
538
- adapter_id = "saakshigupta/deepfake-explainer-new"
539
- model = PeftModel.from_pretrained(model, adapter_id)
540
-
541
- # Set to inference mode
542
- FastVisionModel.for_inference(model)
543
-
544
- return model, tokenizer
545
- except Exception as e:
546
- st.error(f"Error loading model: {str(e)}")
547
- return None, None
548
-
549
  # Analyze image function
550
  def analyze_image_with_llm(image, gradcam_overlay, face_box, pred_label, confidence, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""):
551
  # Create a prompt that includes GradCAM information
@@ -876,7 +872,8 @@ def main():
876
  device = st.session_state.device
877
  model = st.session_state.xception_model
878
 
879
- # Ensure model is in eval mode
 
880
  model.eval()
881
 
882
  # Move tensor to device
@@ -918,7 +915,7 @@ def main():
918
  st.subheader("GradCAM Visualization")
919
  try:
920
  cam, overlay, comparison, detected_face_box = process_image_with_xception_gradcam(
921
- image, model, device, pred_class
922
  )
923
 
924
  if comparison:
 
197
  model = load_xception_model()
198
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
199
 
200
+ # Explicitly move model to device
201
+ model = model.to(device)
202
+
203
  if model is not None:
204
  st.session_state.xception_model = model
205
  st.session_state.device = device
 
256
  if not st.session_state.llm_model_loaded:
257
  if st.button("πŸ“₯ Load Vision LLM", type="primary"):
258
  # Load LLM model
259
+ try:
260
+ with st.spinner("Loading LLM vision model... This may take a few minutes. Please be patient..."):
261
+ # Check for GPU
262
+ has_gpu = check_gpu()
263
+
264
+ # Load base model and tokenizer using Unsloth
265
+ base_model_id = "unsloth/llama-3.2-11b-vision-instruct"
266
+ model, tokenizer = FastVisionModel.from_pretrained(
267
+ base_model_id,
268
+ load_in_4bit=True,
269
+ )
270
+
271
+ # Load the adapter
272
+ adapter_id = "saakshigupta/deepfake-explainer-new"
273
+ model = PeftModel.from_pretrained(model, adapter_id)
274
+
275
+ # Set to inference mode
276
+ FastVisionModel.for_inference(model)
277
+
278
+ if model is not None and tokenizer is not None:
279
+ st.session_state.llm_model = model
280
+ st.session_state.tokenizer = tokenizer
281
+ st.session_state.llm_model_loaded = True
282
+ st.success("βœ… Vision LLM loaded!")
283
+ else:
284
+ st.error("❌ Failed to load Vision LLM.")
285
+ except Exception as e:
286
+ st.error(f"Error loading LLM model: {str(e)}")
287
  else:
288
  st.success("βœ… Vision LLM loaded")
289
 
 
542
  inputs['cross_attention_mask'] = new_mask
543
  return inputs
544
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
  # Analyze image function
546
  def analyze_image_with_llm(image, gradcam_overlay, face_box, pred_label, confidence, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""):
547
  # Create a prompt that includes GradCAM information
 
872
  device = st.session_state.device
873
  model = st.session_state.xception_model
874
 
875
+ # Ensure model is in eval mode and on the correct device
876
+ model = model.to(device)
877
  model.eval()
878
 
879
  # Move tensor to device
 
915
  st.subheader("GradCAM Visualization")
916
  try:
917
  cam, overlay, comparison, detected_face_box = process_image_with_xception_gradcam(
918
+ image, model.to(device), device, pred_class
919
  )
920
 
921
  if comparison: