Update app.py
Browse files
app.py
CHANGED
@@ -360,39 +360,65 @@ def generate_gradcam_caption(image, processor, model, max_length=60):
|
|
360 |
Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
|
361 |
"""
|
362 |
try:
|
|
|
363 |
# Process image first
|
364 |
inputs = processor(image, return_tensors="pt")
|
365 |
|
366 |
# Check for available GPU and move model and inputs
|
367 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
368 |
model = model.to(device)
|
369 |
inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
|
370 |
|
371 |
# Generate caption
|
|
|
372 |
with torch.no_grad():
|
373 |
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
374 |
|
375 |
# Decode the output
|
376 |
caption = processor.decode(output[0], skip_special_tokens=True)
|
|
|
377 |
|
378 |
-
#
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
if low_match:
|
390 |
-
formatted_text += f"**Low activation**:\n{low_match.strip()}"
|
391 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
return formatted_text.strip()
|
393 |
|
394 |
except Exception as e:
|
395 |
st.error(f"Error analyzing GradCAM: {str(e)}")
|
|
|
|
|
396 |
return "Error analyzing GradCAM visualization"
|
397 |
|
398 |
# Function to generate caption for original image
|
@@ -928,12 +954,18 @@ def main():
|
|
928 |
# Generate caption for GradCAM overlay image if BLIP model is loaded
|
929 |
if st.session_state.blip_model_loaded and overlay:
|
930 |
with st.spinner("Analyzing GradCAM visualization..."):
|
|
|
931 |
gradcam_caption = generate_gradcam_caption(
|
932 |
overlay,
|
933 |
st.session_state.finetuned_processor,
|
934 |
st.session_state.finetuned_model
|
935 |
)
|
936 |
st.session_state.gradcam_caption = gradcam_caption
|
|
|
|
|
|
|
|
|
|
|
937 |
except Exception as e:
|
938 |
st.error(f"Error generating GradCAM: {str(e)}")
|
939 |
import traceback
|
@@ -957,6 +989,11 @@ def main():
|
|
957 |
# Image Analysis Summary section - AFTER Stage 2
|
958 |
if hasattr(st.session_state, 'current_image') and (hasattr(st.session_state, 'image_caption') or hasattr(st.session_state, 'gradcam_caption')):
|
959 |
with st.expander("Image Analysis Summary", expanded=True):
|
|
|
|
|
|
|
|
|
|
|
960 |
# Display images and analysis in organized layout
|
961 |
col1, col2 = st.columns([1, 2])
|
962 |
|
@@ -979,6 +1016,8 @@ def main():
|
|
979 |
st.markdown("### GradCAM Analysis")
|
980 |
st.markdown(st.session_state.gradcam_caption)
|
981 |
st.markdown("---")
|
|
|
|
|
982 |
|
983 |
# LLM Analysis section - AFTER Image Analysis Summary
|
984 |
with st.expander("Stage 3: Detailed Analysis with Vision LLM", expanded=False):
|
|
|
360 |
Generate a detailed analysis of GradCAM visualization using the fine-tuned BLIP model
|
361 |
"""
|
362 |
try:
|
363 |
+
st.write("Debug: Starting GradCAM caption generation")
|
364 |
# Process image first
|
365 |
inputs = processor(image, return_tensors="pt")
|
366 |
|
367 |
# Check for available GPU and move model and inputs
|
368 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
369 |
+
st.write(f"Debug: Using device: {device}")
|
370 |
model = model.to(device)
|
371 |
inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
|
372 |
|
373 |
# Generate caption
|
374 |
+
st.write("Debug: Generating caption...")
|
375 |
with torch.no_grad():
|
376 |
output = model.generate(**inputs, max_length=max_length, num_beams=5)
|
377 |
|
378 |
# Decode the output
|
379 |
caption = processor.decode(output[0], skip_special_tokens=True)
|
380 |
+
st.write(f"Debug: Raw caption: {caption}")
|
381 |
|
382 |
+
# Try to parse the caption based on different possible formats
|
383 |
+
try:
|
384 |
+
# Original format with "high activation:" etc.
|
385 |
+
formatted_text = ""
|
386 |
+
if "high activation :" in caption:
|
387 |
+
high_match = caption.split("high activation :")[1].split("moderate")[0]
|
388 |
+
formatted_text += f"**High activation**:\n{high_match.strip()}\n\n"
|
389 |
+
|
390 |
+
if "moderate activation :" in caption:
|
391 |
+
moderate_match = caption.split("moderate activation :")[1].split("low")[0]
|
392 |
+
formatted_text += f"**Moderate activation**:\n{moderate_match.strip()}\n\n"
|
|
|
|
|
393 |
|
394 |
+
if "low activation :" in caption:
|
395 |
+
low_match = caption.split("low activation :")[1]
|
396 |
+
formatted_text += f"**Low activation**:\n{low_match.strip()}"
|
397 |
+
|
398 |
+
# If nothing was extracted using the original format, try alternative formats
|
399 |
+
if not formatted_text.strip():
|
400 |
+
st.write("Debug: Trying alternative format parsing")
|
401 |
+
|
402 |
+
# Check for newer format that might be in the Xception model
|
403 |
+
if ":" in caption:
|
404 |
+
parts = caption.split(":")
|
405 |
+
if len(parts) > 1:
|
406 |
+
formatted_text = f"**GradCAM Analysis**:\n{parts[1].strip()}"
|
407 |
+
else:
|
408 |
+
# As a fallback, just use the entire caption
|
409 |
+
formatted_text = f"**GradCAM Analysis**:\n{caption.strip()}"
|
410 |
+
except Exception as parsing_error:
|
411 |
+
st.write(f"Debug: Error parsing caption format: {str(parsing_error)}")
|
412 |
+
# Use the entire caption as is
|
413 |
+
formatted_text = f"**GradCAM Analysis**:\n{caption.strip()}"
|
414 |
+
|
415 |
+
st.write(f"Debug: Formatted caption complete. Length: {len(formatted_text)}")
|
416 |
return formatted_text.strip()
|
417 |
|
418 |
except Exception as e:
|
419 |
st.error(f"Error analyzing GradCAM: {str(e)}")
|
420 |
+
import traceback
|
421 |
+
st.error(traceback.format_exc())
|
422 |
return "Error analyzing GradCAM visualization"
|
423 |
|
424 |
# Function to generate caption for original image
|
|
|
954 |
# Generate caption for GradCAM overlay image if BLIP model is loaded
|
955 |
if st.session_state.blip_model_loaded and overlay:
|
956 |
with st.spinner("Analyzing GradCAM visualization..."):
|
957 |
+
st.write("Debug: Starting GradCAM analysis")
|
958 |
gradcam_caption = generate_gradcam_caption(
|
959 |
overlay,
|
960 |
st.session_state.finetuned_processor,
|
961 |
st.session_state.finetuned_model
|
962 |
)
|
963 |
st.session_state.gradcam_caption = gradcam_caption
|
964 |
+
st.write(f"Debug: Saved GradCAM caption to session state, length: {len(gradcam_caption) if gradcam_caption else 0}")
|
965 |
+
|
966 |
+
# Display the caption directly here as well for immediate feedback
|
967 |
+
st.markdown("### GradCAM Analysis (Direct)")
|
968 |
+
st.markdown(gradcam_caption)
|
969 |
except Exception as e:
|
970 |
st.error(f"Error generating GradCAM: {str(e)}")
|
971 |
import traceback
|
|
|
989 |
# Image Analysis Summary section - AFTER Stage 2
|
990 |
if hasattr(st.session_state, 'current_image') and (hasattr(st.session_state, 'image_caption') or hasattr(st.session_state, 'gradcam_caption')):
|
991 |
with st.expander("Image Analysis Summary", expanded=True):
|
992 |
+
st.write(f"Debug: Image caption exists: {hasattr(st.session_state, 'image_caption')}")
|
993 |
+
st.write(f"Debug: GradCAM caption exists: {hasattr(st.session_state, 'gradcam_caption')}")
|
994 |
+
if hasattr(st.session_state, 'gradcam_caption'):
|
995 |
+
st.write(f"Debug: GradCAM caption length: {len(st.session_state.gradcam_caption)}")
|
996 |
+
|
997 |
# Display images and analysis in organized layout
|
998 |
col1, col2 = st.columns([1, 2])
|
999 |
|
|
|
1016 |
st.markdown("### GradCAM Analysis")
|
1017 |
st.markdown(st.session_state.gradcam_caption)
|
1018 |
st.markdown("---")
|
1019 |
+
else:
|
1020 |
+
st.warning("GradCAM caption not found in session state.")
|
1021 |
|
1022 |
# LLM Analysis section - AFTER Image Analysis Summary
|
1023 |
with st.expander("Stage 3: Detailed Analysis with Vision LLM", expanded=False):
|