|
import torch |
|
import numpy as np |
|
from PIL import Image |
|
import cv2 |
|
import imutils |
|
import os |
|
import sys |
|
import time |
|
from detectron2.config import get_cfg |
|
from detectron2.projects.deeplab import add_deeplab_config |
|
from detectron2.data import MetadataCatalog |
|
from scipy import ndimage |
|
import colorsys |
|
import math |
|
|
|
torch.set_num_threads(16) |
|
torch.set_num_interop_threads(16) |
|
|
|
from oneformer import ( |
|
add_oneformer_config, |
|
add_common_config, |
|
add_swin_config, |
|
add_dinat_config, |
|
) |
|
|
|
from demo.defaults import DefaultPredictor |
|
from demo.visualizer import Visualizer, ColorMode |
|
|
|
import gradio as gr |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
from utils.contrast_detector import ContrastDetector |
|
from utils.luminance_contrast import LuminanceContrastDetector |
|
from utils.hue_contrast import HueContrastDetector |
|
from utils.saturation_contrast import SaturationContrastDetector |
|
from utils.combined_contrast import CombinedContrastDetector |
|
|
|
KEY_DICT = { |
|
"ADE20K (150 classes)": "ade20k", |
|
} |
|
|
|
SWIN_CFG_DICT = { |
|
"ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml", |
|
} |
|
|
|
SWIN_MODEL_DICT = { |
|
"ade20k": hf_hub_download( |
|
repo_id="shi-labs/oneformer_ade20k_swin_large", |
|
filename="250_16_swin_l_oneformer_ade20k_160k.pth" |
|
) |
|
} |
|
|
|
DINAT_CFG_DICT = { |
|
"ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml", |
|
} |
|
|
|
DINAT_MODEL_DICT = { |
|
"ade20k": hf_hub_download( |
|
repo_id="shi-labs/oneformer_ade20k_dinat_large", |
|
filename="250_16_dinat_l_oneformer_ade20k_160k.pth" |
|
) |
|
} |
|
|
|
MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT, "Swin-L": SWIN_MODEL_DICT} |
|
CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT, "Swin-L": SWIN_CFG_DICT} |
|
WIDTH_DICT = {"ade20k": 640} |
|
|
|
cpu_device = torch.device("cpu") |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"Using device: {device}") |
|
|
|
PREDICTORS = { |
|
"DiNAT-L": {"ADE20K (150 classes)": None}, |
|
"Swin-L": {"ADE20K (150 classes)": None} |
|
} |
|
|
|
METADATA = { |
|
"DiNAT-L": {"ADE20K (150 classes)": None}, |
|
"Swin-L": {"ADE20K (150 classes)": None} |
|
} |
|
|
|
|
|
CONTRAST_DETECTORS = { |
|
"Luminance (WCAG)": LuminanceContrastDetector(), |
|
"Hue": HueContrastDetector(), |
|
"Saturation": SaturationContrastDetector(), |
|
"Combined": CombinedContrastDetector() |
|
} |
|
|
|
def setup_modules(): |
|
for dataset in ["ADE20K (150 classes)"]: |
|
for backbone in ["DiNAT-L", "Swin-L"]: |
|
cfg = setup_cfg(dataset, backbone) |
|
metadata = MetadataCatalog.get( |
|
cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused" |
|
) |
|
PREDICTORS[backbone][dataset] = DefaultPredictor(cfg) |
|
METADATA[backbone][dataset] = metadata |
|
|
|
def setup_cfg(dataset, backbone): |
|
cfg = get_cfg() |
|
add_deeplab_config(cfg) |
|
add_common_config(cfg) |
|
add_swin_config(cfg) |
|
add_oneformer_config(cfg) |
|
add_dinat_config(cfg) |
|
dataset = KEY_DICT[dataset] |
|
cfg_path = CFG_DICT[backbone][dataset] |
|
cfg.merge_from_file(cfg_path) |
|
cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset] |
|
cfg.freeze() |
|
return cfg |
|
|
|
def semantic_run(img, predictor, metadata): |
|
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE) |
|
predictions = predictor(img, "semantic") |
|
out = visualizer.draw_sem_seg( |
|
predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5 |
|
) |
|
return out, predictions["sem_seg"].argmax(dim=0).to(cpu_device).numpy() |
|
|
|
def analyze_contrast(image, segmentation, contrast_method, threshold): |
|
"""Analyze contrast between segments using selected method""" |
|
detector = CONTRAST_DETECTORS[contrast_method] |
|
|
|
|
|
contrast_image, problem_areas, stats = detector.analyze( |
|
image, segmentation, threshold |
|
) |
|
|
|
return contrast_image, problem_areas, stats |
|
|
|
def segment_and_analyze_contrast(path, backbone, contrast_method, threshold): |
|
"""Main function to segment and analyze contrast""" |
|
dataset = "ADE20K (150 classes)" |
|
predictor = PREDICTORS[backbone][dataset] |
|
metadata = METADATA[backbone][dataset] |
|
|
|
|
|
img = cv2.imread(path) |
|
if img is None: |
|
return None, None, "Error: Could not load image" |
|
|
|
width = WIDTH_DICT[KEY_DICT[dataset]] |
|
img = imutils.resize(img, width=width) |
|
|
|
|
|
out, seg_mask = semantic_run(img, predictor, metadata) |
|
out_img = Image.fromarray(out.get_image()) |
|
|
|
|
|
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
contrast_img, problem_areas, stats = analyze_contrast( |
|
img_rgb, seg_mask, contrast_method, threshold |
|
) |
|
|
|
|
|
stats_text = f"### Contrast Analysis Results\n\n" |
|
stats_text += f"**Method:** {contrast_method}\n" |
|
stats_text += f"**Threshold:** {threshold:.2f}\n" |
|
stats_text += f"**Problem Areas:** {stats['problem_count']}\n" |
|
|
|
if 'min_contrast' in stats: |
|
stats_text += f"**Min Contrast:** {stats['min_contrast']:.2f}\n" |
|
if 'max_contrast' in stats: |
|
stats_text += f"**Max Contrast:** {stats['max_contrast']:.2f}\n" |
|
if 'average_contrast' in stats: |
|
stats_text += f"**Average Contrast:** {stats['average_contrast']:.2f}\n" |
|
|
|
|
|
contrast_pil = Image.fromarray(contrast_img) |
|
|
|
return out_img, contrast_pil, stats_text |
|
|
|
|
|
setup_modules() |
|
|
|
|
|
title = "<h1 style='text-align: center'>NeuroNest: Abheek Pradhan - Contrast Model</h1>" |
|
description = "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> "\ |
|
"<a href='https://github.com/lolout1/sam2Contrast' target='_blank'>Github Repo</a></p>" \ |
|
"<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'>" \ |
|
"I am developing NeuroNest, a contrast detection system designed to identify areas with insufficient contrast " \ |
|
"for individuals with Alzheimer's disease. This tool leverages OneFormer's state-of-the-art segmentation " \ |
|
"capabilities trained on ADE20K dataset to detect indoor objects like floors, furniture, walls, and ceilings. " \ |
|
"By analyzing contrast ratios between adjacent segments, NeuroNest flags potential visual accessibility issues " \ |
|
"that may trigger confusion or disorientation in elderly individuals with cognitive impairments.</p>" \ |
|
"<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'>" \ |
|
"[Note: When running on my Linux cluster, please request a GPU node for optimal performance. " \ |
|
"On login nodes, CUDA may not be available.]</p>" |
|
|
|
gradio_inputs = [ |
|
gr.Image(label="Input Image", type="filepath"), |
|
gr.Radio(choices=["Swin-L", "DiNAT-L"], value="Swin-L", label="Backbone"), |
|
gr.Radio( |
|
choices=["Luminance (WCAG)", "Hue", "Saturation", "Combined"], |
|
value="Luminance (WCAG)", |
|
label="Contrast Detection Method" |
|
), |
|
gr.Slider( |
|
minimum=1.0, |
|
maximum=10.0, |
|
value=4.5, |
|
step=0.1, |
|
label="Contrast Threshold (Lower = More Strict)" |
|
) |
|
] |
|
|
|
gradio_outputs = [ |
|
gr.Image(type="pil", label="Segmentation Result"), |
|
gr.Image(type="pil", label="Contrast Analysis"), |
|
gr.Markdown(label="Analysis Results") |
|
] |
|
|
|
examples = [ |
|
["examples/indoor_room.jpg", "Swin-L", "Luminance (WCAG)", 4.5], |
|
["examples/living_room.jpg", "DiNAT-L", "Combined", 3.0], |
|
] |
|
|
|
iface = gr.Interface( |
|
fn=segment_and_analyze_contrast, |
|
inputs=gradio_inputs, |
|
outputs=gradio_outputs, |
|
examples_per_page=5, |
|
allow_flagging="never", |
|
examples=examples, |
|
title=title, |
|
description=description |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch(server_name="0.0.0.0", share=True) |
|
|