cleaning repo
Browse files- .gitignore +0 -1
- gradio_app.py +0 -222
- gradio_combine.py +0 -384
- gradio_contrast.py +0 -455
- gradio_gpu.py +0 -234
.gitignore
CHANGED
@@ -9,7 +9,6 @@ build/
|
|
9 |
dist/
|
10 |
*.egg-info/
|
11 |
.env
|
12 |
-
.claude/
|
13 |
oneformer/modeling/pixel_decoder/ops/build/
|
14 |
oneformer/modeling/pixel_decoder/ops/dist/
|
15 |
oneformer/modeling/pixel_decoder/ops/*.egg-info/
|
|
|
9 |
dist/
|
10 |
*.egg-info/
|
11 |
.env
|
|
|
12 |
oneformer/modeling/pixel_decoder/ops/build/
|
13 |
oneformer/modeling/pixel_decoder/ops/dist/
|
14 |
oneformer/modeling/pixel_decoder/ops/*.egg-info/
|
gradio_app.py
DELETED
@@ -1,222 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
-
print("Installed the dependencies!")
|
4 |
-
|
5 |
-
import numpy as np
|
6 |
-
from PIL import Image
|
7 |
-
import cv2
|
8 |
-
import imutils
|
9 |
-
import os
|
10 |
-
import sys
|
11 |
-
import time
|
12 |
-
from detectron2.config import get_cfg
|
13 |
-
from detectron2.projects.deeplab import add_deeplab_config
|
14 |
-
from detectron2.data import MetadataCatalog
|
15 |
-
torch.set_num_threads(16) # Use 16 CPU threads
|
16 |
-
torch.set_num_interop_threads(16) # Inter-op parallelism
|
17 |
-
from oneformer import (
|
18 |
-
add_oneformer_config,
|
19 |
-
add_common_config,
|
20 |
-
add_swin_config,
|
21 |
-
add_dinat_config,
|
22 |
-
)
|
23 |
-
|
24 |
-
from demo.defaults import DefaultPredictor
|
25 |
-
from demo.visualizer import Visualizer, ColorMode
|
26 |
-
|
27 |
-
import gradio as gr
|
28 |
-
from huggingface_hub import hf_hub_download
|
29 |
-
|
30 |
-
KEY_DICT = {"Cityscapes (19 classes)": "cityscapes",
|
31 |
-
"COCO (133 classes)": "coco",
|
32 |
-
"ADE20K (150 classes)": "ade20k",}
|
33 |
-
|
34 |
-
SWIN_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_swin_large_IN21k_384_bs16_90k.yaml",
|
35 |
-
"coco": "configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml",
|
36 |
-
"ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml",}
|
37 |
-
|
38 |
-
SWIN_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_swin_large",
|
39 |
-
filename="250_16_swin_l_oneformer_cityscapes_90k.pth"),
|
40 |
-
"coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_swin_large",
|
41 |
-
filename="150_16_swin_l_oneformer_coco_100ep.pth"),
|
42 |
-
"ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_swin_large",
|
43 |
-
filename="250_16_swin_l_oneformer_ade20k_160k.pth")
|
44 |
-
}
|
45 |
-
|
46 |
-
DINAT_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_dinat_large_bs16_90k.yaml",
|
47 |
-
"coco": "configs/coco/oneformer_dinat_large_bs16_100ep.yaml",
|
48 |
-
"ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml",}
|
49 |
-
|
50 |
-
DINAT_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_dinat_large",
|
51 |
-
filename="250_16_dinat_l_oneformer_cityscapes_90k.pth"),
|
52 |
-
"coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_dinat_large",
|
53 |
-
filename="150_16_dinat_l_oneformer_coco_100ep.pth"),
|
54 |
-
"ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_dinat_large",
|
55 |
-
filename="250_16_dinat_l_oneformer_ade20k_160k.pth")
|
56 |
-
}
|
57 |
-
|
58 |
-
MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT,
|
59 |
-
"Swin-L": SWIN_MODEL_DICT }
|
60 |
-
|
61 |
-
CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT,
|
62 |
-
"Swin-L": SWIN_CFG_DICT }
|
63 |
-
|
64 |
-
WIDTH_DICT = {"cityscapes": 512,
|
65 |
-
"coco": 512,
|
66 |
-
"ade20k": 640}
|
67 |
-
|
68 |
-
cpu_device = torch.device("cpu")
|
69 |
-
|
70 |
-
PREDICTORS = {
|
71 |
-
"DiNAT-L": {
|
72 |
-
"Cityscapes (19 classes)": None,
|
73 |
-
"COCO (133 classes)": None,
|
74 |
-
"ADE20K (150 classes)": None
|
75 |
-
},
|
76 |
-
"Swin-L": {
|
77 |
-
"Cityscapes (19 classes)": None,
|
78 |
-
"COCO (133 classes)": None,
|
79 |
-
"ADE20K (150 classes)": None
|
80 |
-
}
|
81 |
-
}
|
82 |
-
|
83 |
-
METADATA = {
|
84 |
-
"DiNAT-L": {
|
85 |
-
"Cityscapes (19 classes)": None,
|
86 |
-
"COCO (133 classes)": None,
|
87 |
-
"ADE20K (150 classes)": None
|
88 |
-
},
|
89 |
-
"Swin-L": {
|
90 |
-
"Cityscapes (19 classes)": None,
|
91 |
-
"COCO (133 classes)": None,
|
92 |
-
"ADE20K (150 classes)": None
|
93 |
-
}
|
94 |
-
}
|
95 |
-
|
96 |
-
def setup_modules():
|
97 |
-
for dataset in ["Cityscapes (19 classes)", "COCO (133 classes)", "ADE20K (150 classes)"]:
|
98 |
-
for backbone in ["DiNAT-L", "Swin-L"]:
|
99 |
-
cfg = setup_cfg(dataset, backbone)
|
100 |
-
metadata = MetadataCatalog.get(
|
101 |
-
cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
|
102 |
-
)
|
103 |
-
if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST_PANOPTIC[0]:
|
104 |
-
from cityscapesscripts.helpers.labels import labels
|
105 |
-
stuff_colors = [k.color for k in labels if k.trainId != 255]
|
106 |
-
metadata = metadata.set(stuff_colors=stuff_colors)
|
107 |
-
PREDICTORS[backbone][dataset] = DefaultPredictor(cfg)
|
108 |
-
METADATA[backbone][dataset] = metadata
|
109 |
-
|
110 |
-
def setup_cfg(dataset, backbone):
|
111 |
-
# load config from file and command-line arguments
|
112 |
-
cfg = get_cfg()
|
113 |
-
add_deeplab_config(cfg)
|
114 |
-
add_common_config(cfg)
|
115 |
-
add_swin_config(cfg)
|
116 |
-
add_oneformer_config(cfg)
|
117 |
-
add_dinat_config(cfg)
|
118 |
-
dataset = KEY_DICT[dataset]
|
119 |
-
cfg_path = CFG_DICT[backbone][dataset]
|
120 |
-
cfg.merge_from_file(cfg_path)
|
121 |
-
if torch.cuda.is_available():
|
122 |
-
cfg.MODEL.DEVICE = 'cuda'
|
123 |
-
else:
|
124 |
-
cfg.MODEL.DEVICE = 'cpu'
|
125 |
-
cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]
|
126 |
-
cfg.freeze()
|
127 |
-
return cfg
|
128 |
-
|
129 |
-
# def setup_modules(dataset, backbone):
|
130 |
-
# cfg = setup_cfg(dataset, backbone)
|
131 |
-
# predictor = DefaultPredictor(cfg)
|
132 |
-
# # predictor = PREDICTORS[backbone][dataset]
|
133 |
-
# metadata = MetadataCatalog.get(
|
134 |
-
# cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
|
135 |
-
# )
|
136 |
-
# if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST_PANOPTIC[0]:
|
137 |
-
# from cityscapesscripts.helpers.labels import labels
|
138 |
-
# stuff_colors = [k.color for k in labels if k.trainId != 255]
|
139 |
-
# metadata = metadata.set(stuff_colors=stuff_colors)
|
140 |
-
|
141 |
-
# return predictor, metadata
|
142 |
-
|
143 |
-
def panoptic_run(img, predictor, metadata):
|
144 |
-
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
|
145 |
-
predictions = predictor(img, "panoptic")
|
146 |
-
panoptic_seg, segments_info = predictions["panoptic_seg"]
|
147 |
-
out = visualizer.draw_panoptic_seg_predictions(
|
148 |
-
panoptic_seg.to(cpu_device), segments_info, alpha=0.5
|
149 |
-
)
|
150 |
-
visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
|
151 |
-
out_map = visualizer_map.draw_panoptic_seg_predictions(
|
152 |
-
panoptic_seg.to(cpu_device), segments_info, alpha=1, is_text=False
|
153 |
-
)
|
154 |
-
return out, out_map
|
155 |
-
|
156 |
-
def instance_run(img, predictor, metadata):
|
157 |
-
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
|
158 |
-
predictions = predictor(img, "instance")
|
159 |
-
instances = predictions["instances"].to(cpu_device)
|
160 |
-
out = visualizer.draw_instance_predictions(predictions=instances, alpha=0.5)
|
161 |
-
visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
|
162 |
-
out_map = visualizer_map.draw_instance_predictions(predictions=instances, alpha=1, is_text=False)
|
163 |
-
return out, out_map
|
164 |
-
|
165 |
-
def semantic_run(img, predictor, metadata):
|
166 |
-
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
|
167 |
-
predictions = predictor(img, "semantic")
|
168 |
-
out = visualizer.draw_sem_seg(
|
169 |
-
predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5
|
170 |
-
)
|
171 |
-
visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
|
172 |
-
out_map = visualizer_map.draw_sem_seg(
|
173 |
-
predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=1, is_text=False
|
174 |
-
)
|
175 |
-
return out, out_map
|
176 |
-
|
177 |
-
TASK_INFER = {"the task is panoptic": panoptic_run, "the task is instance": instance_run, "the task is semantic": semantic_run}
|
178 |
-
|
179 |
-
def segment(path, task, dataset, backbone):
|
180 |
-
# predictor, metadata = setup_modules(dataset, backbone)
|
181 |
-
predictor = PREDICTORS[backbone][dataset]
|
182 |
-
metadata = METADATA[backbone][dataset]
|
183 |
-
img = cv2.imread(path)
|
184 |
-
width = WIDTH_DICT[KEY_DICT[dataset]]
|
185 |
-
img = imutils.resize(img, width=width)
|
186 |
-
out, out_map = TASK_INFER[task](img, predictor, metadata)
|
187 |
-
out = Image.fromarray(out.get_image())
|
188 |
-
out_map = Image.fromarray(out_map.get_image())
|
189 |
-
return out, out_map
|
190 |
-
|
191 |
-
title = "<h1 style='text-align: center'>OneFormer:DIEGO MENTORIA MILIONÁRIA - APP 1</h1>"
|
192 |
-
# style='margin-bottom: -10px;
|
193 |
-
description = "<p style='font-size: 14px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://praeclarumjj3.github.io/' style='text-decoration:none' target='_blank'>Jitesh Jain, </a> <a href='https://chrisjuniorli.github.io/' style='text-decoration:none' target='_blank'>Jiachen Li<sup>*</sup>, </a> <a href='https://www.linkedin.com/in/mtchiu/' style='text-decoration:none' target='_blank'>MangTik Chiu<sup>*</sup>, </a> <a href='https://alihassanijr.com/' style='text-decoration:none' target='_blank'>Ali Hassani, </a> <a href='https://www.linkedin.com/in/nukich74/' style='text-decoration:none' target='_blank'>Nikita Orlov, </a> <a href='https://www.humphreyshi.com/home' style='text-decoration:none' target='_blank'>Humphrey Shi</a></p>" \
|
194 |
-
+ "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/oneformer/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/2211.06220' target='_blank'>ArXiv Paper</a> | <a href='https://github.com/SHI-Labs/OneFormer' target='_blank'>Github Repo</a></p>" \
|
195 |
-
+ "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'> \
|
196 |
-
OneFormer is the first multi-task universal image segmentation framework based on transformers. Our single OneFormer model achieves state-of-the-art performance across all three segmentation tasks with a single task-conditioned joint training process. OneFormer uses a task token to condition the model on the task in focus, making our architecture task-guided for training, and task-dynamic for inference, all with a single model. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.\
|
197 |
-
</p>" \
|
198 |
-
+ "<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'> [Note: Inference on CPU may take upto 2 minutes. On a single RTX A6000 GPU, OneFormer is able to inference at more than 15 FPS.]</p>"
|
199 |
-
|
200 |
-
setup_modules()
|
201 |
-
|
202 |
-
gradio_inputs = [gr.Image(label="Input Image",type="filepath"),
|
203 |
-
gr.Radio(choices=["the task is panoptic" ,"the task is instance", "the task is semantic"], type="value", value="the task is panoptic", label="Task Token Input"),
|
204 |
-
gr.Radio(choices=["COCO (133 classes)" ,"Cityscapes (19 classes)", "ADE20K (150 classes)"], type="value", value="COCO (133 classes)", label="Model"),
|
205 |
-
gr.Radio(choices=["DiNAT-L" ,"Swin-L"], type="value", value="DiNAT-L", label="Backbone"),
|
206 |
-
]
|
207 |
-
gradio_outputs = [gr.Image(type="pil", label="Segmentation Overlay"), gr.Image(type="pil", label="Segmentation Map")]
|
208 |
-
|
209 |
-
|
210 |
-
examples = [["examples/coco.jpeg", "the task is panoptic", "COCO (133 classes)", "DiNAT-L"],
|
211 |
-
["examples/cityscapes.png", "the task is panoptic", "Cityscapes (19 classes)", "DiNAT-L"],
|
212 |
-
["examples/ade20k.jpeg", "the task is panoptic", "ADE20K (150 classes)", "DiNAT-L"]]
|
213 |
-
|
214 |
-
|
215 |
-
iface = gr.Interface(fn=segment, inputs=gradio_inputs,
|
216 |
-
outputs=gradio_outputs,
|
217 |
-
examples_per_page=5,
|
218 |
-
allow_flagging="never",
|
219 |
-
examples=examples, title=title,
|
220 |
-
description=description)
|
221 |
-
|
222 |
-
iface.launch(server_name="0.0.0.0",share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gradio_combine.py
DELETED
@@ -1,384 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import numpy as np
|
3 |
-
from PIL import Image
|
4 |
-
import cv2
|
5 |
-
import imutils
|
6 |
-
import os
|
7 |
-
import sys
|
8 |
-
import time
|
9 |
-
import colorsys
|
10 |
-
from scipy import ndimage
|
11 |
-
import gradio as gr
|
12 |
-
from huggingface_hub import hf_hub_download
|
13 |
-
|
14 |
-
# Detectron2 imports
|
15 |
-
from detectron2.config import get_cfg
|
16 |
-
from detectron2.projects.deeplab import add_deeplab_config
|
17 |
-
from detectron2.data import MetadataCatalog
|
18 |
-
from detectron2.engine import DefaultPredictor as DetectronPredictor
|
19 |
-
from detectron2 import model_zoo
|
20 |
-
|
21 |
-
# OneFormer imports
|
22 |
-
from oneformer import (
|
23 |
-
add_oneformer_config,
|
24 |
-
add_common_config,
|
25 |
-
add_swin_config,
|
26 |
-
add_dinat_config,
|
27 |
-
)
|
28 |
-
from demo.defaults import DefaultPredictor as OneFormerPredictor
|
29 |
-
from demo.visualizer import Visualizer, ColorMode
|
30 |
-
|
31 |
-
# NeuroNest contrast detection imports
|
32 |
-
from utils.contrast_detector import ContrastDetector
|
33 |
-
from utils.luminance_contrast import LuminanceContrastDetector
|
34 |
-
from utils.hue_contrast import HueContrastDetector
|
35 |
-
from utils.saturation_contrast import SaturationContrastDetector
|
36 |
-
from utils.combined_contrast import CombinedContrastDetector
|
37 |
-
|
38 |
-
# Set threads for CPU optimization
|
39 |
-
torch.set_num_threads(4)
|
40 |
-
|
41 |
-
########################################
|
42 |
-
# GLOBAL CONFIGURATIONS
|
43 |
-
########################################
|
44 |
-
|
45 |
-
# OneFormer configurations
|
46 |
-
KEY_DICT = {"ADE20K (150 classes)": "ade20k"}
|
47 |
-
|
48 |
-
SWIN_CFG_DICT = {
|
49 |
-
"ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml",
|
50 |
-
}
|
51 |
-
|
52 |
-
SWIN_MODEL_DICT = {
|
53 |
-
"ade20k": hf_hub_download(
|
54 |
-
repo_id="shi-labs/oneformer_ade20k_swin_large",
|
55 |
-
filename="250_16_swin_l_oneformer_ade20k_160k.pth"
|
56 |
-
)
|
57 |
-
}
|
58 |
-
|
59 |
-
DINAT_CFG_DICT = {
|
60 |
-
"ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml",
|
61 |
-
}
|
62 |
-
|
63 |
-
DINAT_MODEL_DICT = {
|
64 |
-
"ade20k": hf_hub_download(
|
65 |
-
repo_id="shi-labs/oneformer_ade20k_dinat_large",
|
66 |
-
filename="250_16_dinat_l_oneformer_ade20k_160k.pth"
|
67 |
-
)
|
68 |
-
}
|
69 |
-
|
70 |
-
MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT, "Swin-L": SWIN_MODEL_DICT}
|
71 |
-
CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT, "Swin-L": SWIN_CFG_DICT}
|
72 |
-
WIDTH_DICT = {"ade20k": 640}
|
73 |
-
|
74 |
-
# Contrast detector mapping
|
75 |
-
CONTRAST_DETECTORS = {
|
76 |
-
"Luminance (WCAG)": LuminanceContrastDetector(),
|
77 |
-
"Hue": HueContrastDetector(),
|
78 |
-
"Saturation": SaturationContrastDetector(),
|
79 |
-
"Combined": CombinedContrastDetector()
|
80 |
-
}
|
81 |
-
|
82 |
-
# Device configuration
|
83 |
-
cpu_device = torch.device("cpu")
|
84 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
85 |
-
print(f"Using device: {device}")
|
86 |
-
|
87 |
-
# Model storage
|
88 |
-
ONEFORMER_PREDICTORS = {
|
89 |
-
"DiNAT-L": {"ADE20K (150 classes)": None},
|
90 |
-
"Swin-L": {"ADE20K (150 classes)": None}
|
91 |
-
}
|
92 |
-
|
93 |
-
ONEFORMER_METADATA = {
|
94 |
-
"DiNAT-L": {"ADE20K (150 classes)": None},
|
95 |
-
"Swin-L": {"ADE20K (150 classes)": None}
|
96 |
-
}
|
97 |
-
|
98 |
-
########################################
|
99 |
-
# MASK R-CNN SETUP AND FUNCTIONS
|
100 |
-
########################################
|
101 |
-
|
102 |
-
def load_maskrcnn_model(weights_path, device="cuda", threshold=0.5):
|
103 |
-
"""Load Mask R-CNN model for blackspot detection"""
|
104 |
-
cfg = get_cfg()
|
105 |
-
cfg.merge_from_file(
|
106 |
-
model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
|
107 |
-
)
|
108 |
-
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 # [Floors, blackspot]
|
109 |
-
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold
|
110 |
-
cfg.MODEL.WEIGHTS = weights_path
|
111 |
-
# Fix: Convert torch.device to string
|
112 |
-
cfg.MODEL.DEVICE = str(device) if isinstance(device, torch.device) else device
|
113 |
-
return DetectronPredictor(cfg)
|
114 |
-
def postprocess_blackspot_masks(im, instances, show_floor=True, show_blackspot=True):
|
115 |
-
"""Extract floor and blackspot masks from Mask R-CNN predictions"""
|
116 |
-
height, width = im.shape[:2]
|
117 |
-
pred_classes = instances.pred_classes.cpu().numpy()
|
118 |
-
pred_masks = instances.pred_masks.cpu().numpy()
|
119 |
-
|
120 |
-
combined_floor_mask = np.zeros((height, width), dtype=bool)
|
121 |
-
final_blackspot = np.zeros((height, width), dtype=bool)
|
122 |
-
|
123 |
-
for cls_id, mask in zip(pred_classes, pred_masks):
|
124 |
-
if cls_id == 0 and show_floor: # Floor class
|
125 |
-
combined_floor_mask |= mask
|
126 |
-
elif cls_id == 1 and show_blackspot: # Blackspot class
|
127 |
-
final_blackspot |= mask
|
128 |
-
|
129 |
-
return combined_floor_mask.astype(np.uint8), final_blackspot.astype(np.uint8)
|
130 |
-
|
131 |
-
########################################
|
132 |
-
# ONEFORMER SETUP AND FUNCTIONS
|
133 |
-
########################################
|
134 |
-
|
135 |
-
def setup_oneformer_modules():
|
136 |
-
"""Initialize OneFormer models"""
|
137 |
-
for dataset in ["ADE20K (150 classes)"]:
|
138 |
-
for backbone in ["DiNAT-L", "Swin-L"]:
|
139 |
-
cfg = setup_oneformer_cfg(dataset, backbone)
|
140 |
-
metadata = MetadataCatalog.get(
|
141 |
-
cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
|
142 |
-
)
|
143 |
-
ONEFORMER_PREDICTORS[backbone][dataset] = OneFormerPredictor(cfg)
|
144 |
-
ONEFORMER_METADATA[backbone][dataset] = metadata
|
145 |
-
|
146 |
-
def setup_oneformer_cfg(dataset, backbone):
|
147 |
-
"""Setup OneFormer configuration"""
|
148 |
-
cfg = get_cfg()
|
149 |
-
add_deeplab_config(cfg)
|
150 |
-
add_common_config(cfg)
|
151 |
-
add_swin_config(cfg)
|
152 |
-
add_oneformer_config(cfg)
|
153 |
-
add_dinat_config(cfg)
|
154 |
-
dataset = KEY_DICT[dataset]
|
155 |
-
cfg_path = CFG_DICT[backbone][dataset]
|
156 |
-
cfg.merge_from_file(cfg_path)
|
157 |
-
cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
158 |
-
cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]
|
159 |
-
cfg.freeze()
|
160 |
-
return cfg
|
161 |
-
|
162 |
-
def semantic_run(img, predictor, metadata):
|
163 |
-
"""Run OneFormer semantic segmentation"""
|
164 |
-
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
|
165 |
-
predictions = predictor(img, "semantic")
|
166 |
-
out = visualizer.draw_sem_seg(
|
167 |
-
predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5
|
168 |
-
)
|
169 |
-
return out, predictions["sem_seg"].argmax(dim=0).to(cpu_device).numpy()
|
170 |
-
|
171 |
-
########################################
|
172 |
-
# INTEGRATED ANALYSIS FUNCTION
|
173 |
-
########################################
|
174 |
-
|
175 |
-
def integrated_analysis(image_path,
|
176 |
-
# Blackspot detection parameters
|
177 |
-
blackspot_threshold, show_floor, show_blackspot,
|
178 |
-
# Contrast detection parameters
|
179 |
-
enable_contrast, backbone, contrast_method, contrast_threshold):
|
180 |
-
"""
|
181 |
-
Perform integrated analysis with both blackspot detection and contrast analysis
|
182 |
-
"""
|
183 |
-
# Read the image
|
184 |
-
im = cv2.imread(image_path)
|
185 |
-
if im is None:
|
186 |
-
return "Error: could not read image!", None, None, None
|
187 |
-
|
188 |
-
# Resize for OneFormer if contrast analysis is enabled
|
189 |
-
if enable_contrast:
|
190 |
-
width = WIDTH_DICT["ade20k"]
|
191 |
-
im_resized = imutils.resize(im, width=width)
|
192 |
-
else:
|
193 |
-
im_resized = im
|
194 |
-
|
195 |
-
# Part 1: Blackspot Detection with Mask R-CNN
|
196 |
-
blackspot_text = []
|
197 |
-
blackspot_viz = None
|
198 |
-
|
199 |
-
if show_floor or show_blackspot:
|
200 |
-
weights_path = "./output_floor_blackspot/model_0004999.pth"
|
201 |
-
maskrcnn_predictor = load_maskrcnn_model(weights_path, device, blackspot_threshold)
|
202 |
-
|
203 |
-
# Run blackspot detection
|
204 |
-
outputs = maskrcnn_predictor(im)
|
205 |
-
instances = outputs["instances"]
|
206 |
-
|
207 |
-
# Post-process masks
|
208 |
-
floor_mask, blackspot_mask = postprocess_blackspot_masks(im, instances, show_floor, show_blackspot)
|
209 |
-
|
210 |
-
# Create visualization
|
211 |
-
blackspot_overlay = im.copy()
|
212 |
-
overlay = np.zeros_like(im)
|
213 |
-
|
214 |
-
if show_floor:
|
215 |
-
overlay[floor_mask > 0] = (0, 255, 0) # Green for floor
|
216 |
-
if show_blackspot:
|
217 |
-
overlay[blackspot_mask > 0] = (0, 0, 255) # Red for blackspot
|
218 |
-
|
219 |
-
blackspot_overlay = cv2.addWeighted(im, 1.0, overlay, 0.5, 0)
|
220 |
-
blackspot_viz = Image.fromarray(cv2.cvtColor(blackspot_overlay, cv2.COLOR_BGR2RGB))
|
221 |
-
|
222 |
-
# Calculate statistics
|
223 |
-
blackspot_area = int(blackspot_mask.sum())
|
224 |
-
floor_area = int(floor_mask.sum())
|
225 |
-
|
226 |
-
blackspot_text.append(f"### Blackspot Detection Results")
|
227 |
-
blackspot_text.append(f"**Threshold:** {blackspot_threshold:.2f}")
|
228 |
-
|
229 |
-
if show_floor:
|
230 |
-
blackspot_text.append(f"**Floor area:** {floor_area} pixels")
|
231 |
-
if show_blackspot:
|
232 |
-
blackspot_text.append(f"**Blackspot area:** {blackspot_area} pixels")
|
233 |
-
if floor_area > 0 and show_floor:
|
234 |
-
percentage = (blackspot_area / floor_area) * 100
|
235 |
-
blackspot_text.append(f"**Blackspot coverage:** {percentage:.2f}% of floor area")
|
236 |
-
|
237 |
-
# Part 2: Contrast Analysis with OneFormer
|
238 |
-
segmentation_viz = None
|
239 |
-
contrast_viz = None
|
240 |
-
contrast_text = []
|
241 |
-
|
242 |
-
if enable_contrast:
|
243 |
-
dataset = "ADE20K (150 classes)"
|
244 |
-
predictor = ONEFORMER_PREDICTORS[backbone][dataset]
|
245 |
-
metadata = ONEFORMER_METADATA[backbone][dataset]
|
246 |
-
|
247 |
-
# Get segmentation
|
248 |
-
out, seg_mask = semantic_run(im_resized, predictor, metadata)
|
249 |
-
segmentation_viz = Image.fromarray(out.get_image())
|
250 |
-
|
251 |
-
# Analyze contrast
|
252 |
-
img_rgb = cv2.cvtColor(im_resized, cv2.COLOR_BGR2RGB)
|
253 |
-
detector = CONTRAST_DETECTORS[contrast_method]
|
254 |
-
contrast_image, problem_areas, stats = detector.analyze(
|
255 |
-
img_rgb, seg_mask, contrast_threshold
|
256 |
-
)
|
257 |
-
|
258 |
-
contrast_viz = Image.fromarray(contrast_image)
|
259 |
-
|
260 |
-
# Create stats text
|
261 |
-
contrast_text.append(f"### Contrast Analysis Results")
|
262 |
-
contrast_text.append(f"**Method:** {contrast_method}")
|
263 |
-
contrast_text.append(f"**Threshold:** {contrast_threshold:.2f}")
|
264 |
-
contrast_text.append(f"**Problem Areas:** {stats['problem_count']}")
|
265 |
-
|
266 |
-
if 'min_contrast' in stats:
|
267 |
-
contrast_text.append(f"**Min Contrast:** {stats['min_contrast']:.2f}")
|
268 |
-
if 'max_contrast' in stats:
|
269 |
-
contrast_text.append(f"**Max Contrast:** {stats['max_contrast']:.2f}")
|
270 |
-
if 'average_contrast' in stats:
|
271 |
-
contrast_text.append(f"**Average Contrast:** {stats['average_contrast']:.2f}")
|
272 |
-
|
273 |
-
# Combine results
|
274 |
-
combined_text = []
|
275 |
-
if blackspot_text:
|
276 |
-
combined_text.extend(blackspot_text)
|
277 |
-
if contrast_text:
|
278 |
-
if blackspot_text:
|
279 |
-
combined_text.append("\n")
|
280 |
-
combined_text.extend(contrast_text)
|
281 |
-
|
282 |
-
return "\n".join(combined_text), blackspot_viz, segmentation_viz, contrast_viz
|
283 |
-
|
284 |
-
########################################
|
285 |
-
# GRADIO INTERFACE
|
286 |
-
########################################
|
287 |
-
|
288 |
-
# Initialize models
|
289 |
-
print("Initializing OneFormer models...")
|
290 |
-
setup_oneformer_modules()
|
291 |
-
|
292 |
-
title = "NeuroNest: Integrated Blackspot & Contrast Detection"
|
293 |
-
description = """
|
294 |
-
This integrated system combines:
|
295 |
-
1. **Blackspot Detection**: Uses Mask R-CNN to detect blackspots on floors
|
296 |
-
2. **Contrast Analysis**: Uses OneFormer segmentation to analyze contrast between objects
|
297 |
-
|
298 |
-
Both analyses help identify potential accessibility issues for individuals with Alzheimer's disease.
|
299 |
-
"""
|
300 |
-
|
301 |
-
# Create the Gradio interface
|
302 |
-
with gr.Blocks(title=title) as demo:
|
303 |
-
gr.Markdown(f"# {title}")
|
304 |
-
gr.Markdown(description)
|
305 |
-
|
306 |
-
with gr.Row():
|
307 |
-
with gr.Column(scale=1):
|
308 |
-
# Input image
|
309 |
-
image_input = gr.Image(label="Input Image", type="filepath")
|
310 |
-
|
311 |
-
# Blackspot detection controls
|
312 |
-
with gr.Accordion("Blackspot Detection Settings", open=True):
|
313 |
-
blackspot_threshold = gr.Slider(
|
314 |
-
minimum=0.1, maximum=0.9, value=0.5, step=0.05,
|
315 |
-
label="Blackspot Detection Threshold"
|
316 |
-
)
|
317 |
-
with gr.Row():
|
318 |
-
show_floor = gr.Checkbox(value=True, label="Show Floor")
|
319 |
-
show_blackspot = gr.Checkbox(value=True, label="Show Blackspots")
|
320 |
-
|
321 |
-
# Contrast analysis controls
|
322 |
-
with gr.Accordion("Contrast Analysis Settings", open=True):
|
323 |
-
enable_contrast = gr.Checkbox(value=True, label="Enable Contrast Analysis")
|
324 |
-
backbone = gr.Radio(
|
325 |
-
choices=["Swin-L", "DiNAT-L"],
|
326 |
-
value="Swin-L",
|
327 |
-
label="OneFormer Backbone"
|
328 |
-
)
|
329 |
-
contrast_method = gr.Radio(
|
330 |
-
choices=["Luminance (WCAG)", "Hue", "Saturation", "Combined"],
|
331 |
-
value="Luminance (WCAG)",
|
332 |
-
label="Contrast Detection Method"
|
333 |
-
)
|
334 |
-
contrast_threshold = gr.Slider(
|
335 |
-
minimum=1.0, maximum=10.0, value=4.5, step=0.1,
|
336 |
-
label="Contrast Threshold"
|
337 |
-
)
|
338 |
-
|
339 |
-
analyze_btn = gr.Button("Analyze", variant="primary")
|
340 |
-
|
341 |
-
with gr.Column(scale=2):
|
342 |
-
# Output displays
|
343 |
-
with gr.Tabs():
|
344 |
-
with gr.Tab("Analysis Report"):
|
345 |
-
analysis_text = gr.Textbox(label="Analysis Results", lines=10)
|
346 |
-
|
347 |
-
with gr.Tab("Blackspot Detection"):
|
348 |
-
blackspot_output = gr.Image(label="Blackspot Visualization")
|
349 |
-
|
350 |
-
with gr.Tab("Segmentation"):
|
351 |
-
segmentation_output = gr.Image(label="OneFormer Segmentation")
|
352 |
-
|
353 |
-
with gr.Tab("Contrast Analysis"):
|
354 |
-
contrast_output = gr.Image(label="Contrast Visualization")
|
355 |
-
|
356 |
-
# Connect the interface
|
357 |
-
analyze_btn.click(
|
358 |
-
fn=integrated_analysis,
|
359 |
-
inputs=[
|
360 |
-
image_input,
|
361 |
-
blackspot_threshold, show_floor, show_blackspot,
|
362 |
-
enable_contrast, backbone, contrast_method, contrast_threshold
|
363 |
-
],
|
364 |
-
outputs=[
|
365 |
-
analysis_text, blackspot_output, segmentation_output, contrast_output
|
366 |
-
]
|
367 |
-
)
|
368 |
-
|
369 |
-
# Examples
|
370 |
-
gr.Examples(
|
371 |
-
examples=[
|
372 |
-
["examples/indoor_room.jpg", 0.5, True, True, True, "Swin-L", "Luminance (WCAG)", 4.5],
|
373 |
-
["examples/living_room.jpg", 0.7, True, True, True, "DiNAT-L", "Combined", 3.0],
|
374 |
-
],
|
375 |
-
inputs=[
|
376 |
-
image_input,
|
377 |
-
blackspot_threshold, show_floor, show_blackspot,
|
378 |
-
enable_contrast, backbone, contrast_method, contrast_threshold
|
379 |
-
]
|
380 |
-
)
|
381 |
-
|
382 |
-
if __name__ == "__main__":
|
383 |
-
print(f"Launching integrated NeuroNest app on device: {device}")
|
384 |
-
demo.queue().launch(server_name="0.0.0.0", share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gradio_contrast.py
DELETED
@@ -1,455 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import os
|
3 |
-
import sys
|
4 |
-
import time
|
5 |
-
import numpy as np
|
6 |
-
from PIL import Image
|
7 |
-
import cv2
|
8 |
-
import imutils
|
9 |
-
import colorsys
|
10 |
-
from scipy import ndimage
|
11 |
-
|
12 |
-
# Set CUDA device explicitly at the start
|
13 |
-
if torch.cuda.is_available():
|
14 |
-
torch.cuda.set_device(0) # Use first GPU
|
15 |
-
print(f"Using GPU: {torch.cuda.get_device_name(0)}")
|
16 |
-
else:
|
17 |
-
print("WARNING: No GPU available, using CPU")
|
18 |
-
|
19 |
-
print("Installed the dependencies!")
|
20 |
-
|
21 |
-
from detectron2.config import get_cfg
|
22 |
-
from detectron2.projects.deeplab import add_deeplab_config
|
23 |
-
from detectron2.data import MetadataCatalog
|
24 |
-
|
25 |
-
from oneformer import (
|
26 |
-
add_oneformer_config,
|
27 |
-
add_common_config,
|
28 |
-
add_swin_config,
|
29 |
-
add_dinat_config,
|
30 |
-
)
|
31 |
-
|
32 |
-
from demo.defaults import DefaultPredictor
|
33 |
-
from demo.visualizer import Visualizer, ColorMode
|
34 |
-
|
35 |
-
import gradio as gr
|
36 |
-
from huggingface_hub import hf_hub_download
|
37 |
-
|
38 |
-
# Force unbuffered output for SLURM logs
|
39 |
-
sys.stdout = sys.__stdout__
|
40 |
-
sys.stderr = sys.__stderr__
|
41 |
-
|
42 |
-
# Set environment variables for better GPU performance
|
43 |
-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
|
44 |
-
os.environ['TORCH_USE_CUDA_DSA'] = '1'
|
45 |
-
|
46 |
-
# Contrast Detection Classes
|
47 |
-
class ContrastDetector:
|
48 |
-
"""Base class for contrast detection between segments"""
|
49 |
-
|
50 |
-
@staticmethod
|
51 |
-
def calculate_luminance_contrast(color1, color2):
|
52 |
-
"""Calculate WCAG luminance contrast ratio"""
|
53 |
-
def get_relative_luminance(rgb):
|
54 |
-
r, g, b = [val/255.0 for val in rgb]
|
55 |
-
r = r/12.92 if r <= 0.03928 else ((r + 0.055)/1.055) ** 2.4
|
56 |
-
g = g/12.92 if g <= 0.03928 else ((g + 0.055)/1.055) ** 2.4
|
57 |
-
b = b/12.92 if b <= 0.03928 else ((b + 0.055)/1.055) ** 2.4
|
58 |
-
return 0.2126 * r + 0.7152 * g + 0.0722 * b
|
59 |
-
|
60 |
-
lum1 = get_relative_luminance(color1)
|
61 |
-
lum2 = get_relative_luminance(color2)
|
62 |
-
|
63 |
-
lighter = max(lum1, lum2)
|
64 |
-
darker = min(lum1, lum2)
|
65 |
-
|
66 |
-
return (lighter + 0.05) / (darker + 0.05)
|
67 |
-
|
68 |
-
@staticmethod
|
69 |
-
def calculate_hue_contrast(color1, color2):
|
70 |
-
"""Calculate hue difference between two colors"""
|
71 |
-
hsv1 = colorsys.rgb_to_hsv(color1[0]/255.0, color1[1]/255.0, color1[2]/255.0)
|
72 |
-
hsv2 = colorsys.rgb_to_hsv(color2[0]/255.0, color2[1]/255.0, color2[2]/255.0)
|
73 |
-
|
74 |
-
hue_diff = abs(hsv1[0] - hsv2[0])
|
75 |
-
if hue_diff > 0.5:
|
76 |
-
hue_diff = 1 - hue_diff
|
77 |
-
|
78 |
-
return hue_diff * 2
|
79 |
-
|
80 |
-
@staticmethod
|
81 |
-
def calculate_saturation_contrast(color1, color2):
|
82 |
-
"""Calculate saturation difference between two colors"""
|
83 |
-
hsv1 = colorsys.rgb_to_hsv(color1[0]/255.0, color1[1]/255.0, color1[2]/255.0)
|
84 |
-
hsv2 = colorsys.rgb_to_hsv(color2[0]/255.0, color2[1]/255.0, color2[2]/255.0)
|
85 |
-
|
86 |
-
return abs(hsv1[1] - hsv2[1])
|
87 |
-
|
88 |
-
@staticmethod
|
89 |
-
def analyze_contrast(image, segmentation, method="luminance", threshold=4.5):
|
90 |
-
"""Analyze contrast between adjacent segments"""
|
91 |
-
unique_segments = np.unique(segmentation)
|
92 |
-
h, w = segmentation.shape
|
93 |
-
contrast_mask = np.zeros((h, w), dtype=bool)
|
94 |
-
problem_areas = []
|
95 |
-
|
96 |
-
# Calculate average colors for each segment
|
97 |
-
segment_colors = {}
|
98 |
-
for seg_id in unique_segments:
|
99 |
-
mask = segmentation == seg_id
|
100 |
-
if np.any(mask):
|
101 |
-
segment_colors[seg_id] = np.mean(image[mask], axis=0).astype(int)
|
102 |
-
|
103 |
-
# Check contrast between adjacent segments
|
104 |
-
for i in range(h):
|
105 |
-
for j in range(w):
|
106 |
-
current_seg = segmentation[i, j]
|
107 |
-
|
108 |
-
# Check 4-connected neighbors
|
109 |
-
for di, dj in [(0, 1), (1, 0), (0, -1), (-1, 0)]:
|
110 |
-
ni, nj = i + di, j + dj
|
111 |
-
if 0 <= ni < h and 0 <= nj < w:
|
112 |
-
neighbor_seg = segmentation[ni, nj]
|
113 |
-
|
114 |
-
if current_seg != neighbor_seg:
|
115 |
-
color1 = segment_colors[current_seg]
|
116 |
-
color2 = segment_colors[neighbor_seg]
|
117 |
-
|
118 |
-
if method == "luminance":
|
119 |
-
contrast = ContrastDetector.calculate_luminance_contrast(color1, color2)
|
120 |
-
elif method == "hue":
|
121 |
-
contrast = ContrastDetector.calculate_hue_contrast(color1, color2)
|
122 |
-
threshold = 0.3 # Adjust threshold for hue
|
123 |
-
elif method == "saturation":
|
124 |
-
contrast = ContrastDetector.calculate_saturation_contrast(color1, color2)
|
125 |
-
threshold = 0.3 # Adjust threshold for saturation
|
126 |
-
|
127 |
-
if contrast < threshold:
|
128 |
-
contrast_mask[i, j] = True
|
129 |
-
problem_areas.append((current_seg, neighbor_seg, contrast))
|
130 |
-
|
131 |
-
return contrast_mask, problem_areas, segment_colors
|
132 |
-
|
133 |
-
# Rest of your code remains the same until setup_cfg function
|
134 |
-
KEY_DICT = {"Cityscapes (19 classes)": "cityscapes",
|
135 |
-
"COCO (133 classes)": "coco",
|
136 |
-
"ADE20K (150 classes)": "ade20k",}
|
137 |
-
|
138 |
-
SWIN_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_swin_large_IN21k_384_bs16_90k.yaml",
|
139 |
-
"coco": "configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml",
|
140 |
-
"ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml",}
|
141 |
-
|
142 |
-
SWIN_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_swin_large",
|
143 |
-
filename="250_16_swin_l_oneformer_cityscapes_90k.pth"),
|
144 |
-
"coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_swin_large",
|
145 |
-
filename="150_16_swin_l_oneformer_coco_100ep.pth"),
|
146 |
-
"ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_swin_large",
|
147 |
-
filename="250_16_swin_l_oneformer_ade20k_160k.pth")
|
148 |
-
}
|
149 |
-
|
150 |
-
DINAT_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_dinat_large_bs16_90k.yaml",
|
151 |
-
"coco": "configs/coco/oneformer_dinat_large_bs16_100ep.yaml",
|
152 |
-
"ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml",}
|
153 |
-
|
154 |
-
DINAT_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_dinat_large",
|
155 |
-
filename="250_16_dinat_l_oneformer_cityscapes_90k.pth"),
|
156 |
-
"coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_dinat_large",
|
157 |
-
filename="150_16_dinat_l_oneformer_coco_100ep.pth"),
|
158 |
-
"ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_dinat_large",
|
159 |
-
filename="250_16_dinat_l_oneformer_ade20k_160k.pth")
|
160 |
-
}
|
161 |
-
|
162 |
-
MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT,
|
163 |
-
"Swin-L": SWIN_MODEL_DICT }
|
164 |
-
|
165 |
-
CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT,
|
166 |
-
"Swin-L": SWIN_CFG_DICT }
|
167 |
-
|
168 |
-
WIDTH_DICT = {"cityscapes": 512,
|
169 |
-
"coco": 512,
|
170 |
-
"ade20k": 640}
|
171 |
-
|
172 |
-
# Modified to ensure CUDA device
|
173 |
-
if torch.cuda.is_available():
|
174 |
-
device = torch.device("cuda:0")
|
175 |
-
print(f"Using device: {device}")
|
176 |
-
else:
|
177 |
-
device = torch.device("cpu")
|
178 |
-
print(f"WARNING: Using CPU device")
|
179 |
-
|
180 |
-
cpu_device = torch.device("cpu")
|
181 |
-
|
182 |
-
PREDICTORS = {
|
183 |
-
"DiNAT-L": {
|
184 |
-
"Cityscapes (19 classes)": None,
|
185 |
-
"COCO (133 classes)": None,
|
186 |
-
"ADE20K (150 classes)": None
|
187 |
-
},
|
188 |
-
"Swin-L": {
|
189 |
-
"Cityscapes (19 classes)": None,
|
190 |
-
"COCO (133 classes)": None,
|
191 |
-
"ADE20K (150 classes)": None
|
192 |
-
}
|
193 |
-
}
|
194 |
-
|
195 |
-
METADATA = {
|
196 |
-
"DiNAT-L": {
|
197 |
-
"Cityscapes (19 classes)": None,
|
198 |
-
"COCO (133 classes)": None,
|
199 |
-
"ADE20K (150 classes)": None
|
200 |
-
},
|
201 |
-
"Swin-L": {
|
202 |
-
"Cityscapes (19 classes)": None,
|
203 |
-
"COCO (133 classes)": None,
|
204 |
-
"ADE20K (150 classes)": None
|
205 |
-
}
|
206 |
-
}
|
207 |
-
|
208 |
-
def setup_modules():
|
209 |
-
print("Setting up modules...")
|
210 |
-
for dataset in ["Cityscapes (19 classes)", "COCO (133 classes)", "ADE20K (150 classes)"]:
|
211 |
-
for backbone in ["DiNAT-L", "Swin-L"]:
|
212 |
-
print(f"Loading {backbone} for {dataset}...")
|
213 |
-
cfg = setup_cfg(dataset, backbone)
|
214 |
-
metadata = MetadataCatalog.get(
|
215 |
-
cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
|
216 |
-
)
|
217 |
-
if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST_PANOPTIC[0]:
|
218 |
-
from cityscapesscripts.helpers.labels import labels
|
219 |
-
stuff_colors = [k.color for k in labels if k.trainId != 255]
|
220 |
-
metadata = metadata.set(stuff_colors=stuff_colors)
|
221 |
-
|
222 |
-
# Create predictor with explicit device
|
223 |
-
predictor = DefaultPredictor(cfg)
|
224 |
-
predictor.model.to(device)
|
225 |
-
|
226 |
-
PREDICTORS[backbone][dataset] = predictor
|
227 |
-
METADATA[backbone][dataset] = metadata
|
228 |
-
print(f"✓ Loaded {backbone} for {dataset}")
|
229 |
-
print("All modules setup complete!")
|
230 |
-
|
231 |
-
def setup_cfg(dataset, backbone):
|
232 |
-
# load config from file and command-line arguments
|
233 |
-
cfg = get_cfg()
|
234 |
-
add_deeplab_config(cfg)
|
235 |
-
add_common_config(cfg)
|
236 |
-
add_swin_config(cfg)
|
237 |
-
add_oneformer_config(cfg)
|
238 |
-
add_dinat_config(cfg)
|
239 |
-
dataset = KEY_DICT[dataset]
|
240 |
-
cfg_path = CFG_DICT[backbone][dataset]
|
241 |
-
cfg.merge_from_file(cfg_path)
|
242 |
-
|
243 |
-
# Explicitly set device to CUDA if available
|
244 |
-
if torch.cuda.is_available():
|
245 |
-
cfg.MODEL.DEVICE = 'cuda:0'
|
246 |
-
print(f"Config set to use CUDA device")
|
247 |
-
else:
|
248 |
-
cfg.MODEL.DEVICE = 'cpu'
|
249 |
-
print(f"Config set to use CPU device")
|
250 |
-
|
251 |
-
cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]
|
252 |
-
cfg.freeze()
|
253 |
-
return cfg
|
254 |
-
|
255 |
-
# Rest of your functions remain the same
|
256 |
-
def panoptic_run(img, predictor, metadata):
|
257 |
-
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
|
258 |
-
predictions = predictor(img, "panoptic")
|
259 |
-
panoptic_seg, segments_info = predictions["panoptic_seg"]
|
260 |
-
out = visualizer.draw_panoptic_seg_predictions(
|
261 |
-
panoptic_seg.to(cpu_device), segments_info, alpha=0.5
|
262 |
-
)
|
263 |
-
visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
|
264 |
-
out_map = visualizer_map.draw_panoptic_seg_predictions(
|
265 |
-
panoptic_seg.to(cpu_device), segments_info, alpha=1, is_text=False
|
266 |
-
)
|
267 |
-
return out, out_map, predictions
|
268 |
-
|
269 |
-
def instance_run(img, predictor, metadata):
|
270 |
-
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
|
271 |
-
predictions = predictor(img, "instance")
|
272 |
-
instances = predictions["instances"].to(cpu_device)
|
273 |
-
out = visualizer.draw_instance_predictions(predictions=instances, alpha=0.5)
|
274 |
-
visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
|
275 |
-
out_map = visualizer_map.draw_instance_predictions(predictions=instances, alpha=1, is_text=False)
|
276 |
-
return out, out_map, predictions
|
277 |
-
|
278 |
-
def semantic_run(img, predictor, metadata):
|
279 |
-
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
|
280 |
-
predictions = predictor(img, "semantic")
|
281 |
-
out = visualizer.draw_sem_seg(
|
282 |
-
predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5
|
283 |
-
)
|
284 |
-
visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
|
285 |
-
out_map = visualizer_map.draw_sem_seg(
|
286 |
-
predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=1, is_text=False
|
287 |
-
)
|
288 |
-
return out, out_map, predictions
|
289 |
-
|
290 |
-
TASK_INFER = {"the task is panoptic": panoptic_run, "the task is instance": instance_run, "the task is semantic": semantic_run}
|
291 |
-
|
292 |
-
def create_contrast_visualization(img, contrast_mask, problem_areas, segment_colors):
|
293 |
-
"""Create visualization of contrast issues"""
|
294 |
-
# Copy original image
|
295 |
-
contrast_viz = img.copy()
|
296 |
-
|
297 |
-
# Highlight low contrast boundaries
|
298 |
-
boundary_color = (255, 0, 0) # Red for problem areas
|
299 |
-
contrast_viz[contrast_mask] = boundary_color
|
300 |
-
|
301 |
-
# Add information overlay
|
302 |
-
info_text = f"Low contrast areas detected: {len(problem_areas)}"
|
303 |
-
cv2.putText(contrast_viz, info_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
|
304 |
-
|
305 |
-
return contrast_viz
|
306 |
-
|
307 |
-
def segment_and_analyze(path, task, dataset, backbone, enable_contrast, contrast_method, contrast_threshold):
|
308 |
-
# Get predictions and segmentation visualization
|
309 |
-
predictor = PREDICTORS[backbone][dataset]
|
310 |
-
metadata = METADATA[backbone][dataset]
|
311 |
-
img = cv2.imread(path)
|
312 |
-
width = WIDTH_DICT[KEY_DICT[dataset]]
|
313 |
-
img = imutils.resize(img, width=width)
|
314 |
-
|
315 |
-
out, out_map, predictions = TASK_INFER[task](img, predictor, metadata)
|
316 |
-
out_img = Image.fromarray(out.get_image())
|
317 |
-
out_map_img = Image.fromarray(out_map.get_image())
|
318 |
-
|
319 |
-
if not enable_contrast:
|
320 |
-
return out_img, out_map_img, None, None
|
321 |
-
|
322 |
-
# Extract segmentation mask from predictions
|
323 |
-
if task == "the task is semantic":
|
324 |
-
seg_mask = predictions["sem_seg"].argmax(dim=0).cpu().numpy()
|
325 |
-
elif task == "the task is panoptic":
|
326 |
-
seg_mask, _ = predictions["panoptic_seg"]
|
327 |
-
seg_mask = seg_mask.cpu().numpy()
|
328 |
-
elif task == "the task is instance":
|
329 |
-
# For instance segmentation, create a mask from instances
|
330 |
-
instances = predictions["instances"].to("cpu")
|
331 |
-
seg_mask = np.zeros(img.shape[:2], dtype=np.int32)
|
332 |
-
for i, mask in enumerate(instances.pred_masks):
|
333 |
-
seg_mask[mask] = i + 1
|
334 |
-
|
335 |
-
# Analyze contrast
|
336 |
-
contrast_mask, problem_areas, segment_colors = ContrastDetector.analyze_contrast(
|
337 |
-
img, seg_mask, method=contrast_method, threshold=contrast_threshold
|
338 |
-
)
|
339 |
-
|
340 |
-
# Create contrast visualization
|
341 |
-
contrast_viz = create_contrast_visualization(img, contrast_mask, problem_areas, segment_colors)
|
342 |
-
contrast_viz_img = Image.fromarray(contrast_viz[:, :, ::-1]) # Convert BGR to RGB
|
343 |
-
|
344 |
-
# Generate analysis report
|
345 |
-
report = f"### Contrast Analysis Report\n\n"
|
346 |
-
report += f"**Method:** {contrast_method.capitalize()}\n"
|
347 |
-
report += f"**Threshold:** {contrast_threshold}\n"
|
348 |
-
report += f"**Total segments:** {len(segment_colors)}\n"
|
349 |
-
report += f"**Low contrast boundaries found:** {len(problem_areas)}\n\n"
|
350 |
-
|
351 |
-
if problem_areas:
|
352 |
-
report += "**Problem Areas:**\n"
|
353 |
-
for i, (seg1, seg2, contrast_value) in enumerate(problem_areas[:10]): # Show first 10
|
354 |
-
report += f"- Segments {seg1} and {seg2}: Contrast ratio = {contrast_value:.2f}\n"
|
355 |
-
if len(problem_areas) > 10:
|
356 |
-
report += f"... and {len(problem_areas) - 10} more\n"
|
357 |
-
|
358 |
-
return out_img, out_map_img, contrast_viz_img, report
|
359 |
-
|
360 |
-
title = "<h1 style='text-align: center'>OneFormer:DIEGO MENTORIA MILIONÁRIA - APP 1</h1>"
|
361 |
-
description = "<p style='font-size: 14px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://github.com/lolout1/sam2Contrast' style='text-decoration:none' target='_blank'>NeuroNest Contrast Model</a></p>" \
|
362 |
-
+ "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/oneformer/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/2211.06220' target='_blank'>ArXiv Paper</a> | <a href='https://github.com/SHI-Labs/OneFormer' target='_blank'>Github Repo</a></p>" \
|
363 |
-
+ "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'> \
|
364 |
-
This model leverages the OneFormer architecture to perform comprehensive image segmentation and labeling across multiple tasks. The system can identify and segment various objects, structures, and regions within images with high accuracy. It supports semantic, instance, and panoptic segmentation modes, enabling detailed analysis of indoor and outdoor environments. The model excels at distinguishing between different classes of objects, from common everyday items to complex urban structures, making it particularly useful for environmental analysis and scene understanding applications.\
|
365 |
-
</p>" \
|
366 |
-
+ "<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'> [Note: Inference on CPU may take upto 2 minutes. On a single RTX A6000 GPU, OneFormer is able to inference at more than 15 FPS.]</p>"
|
367 |
-
|
368 |
-
# Main execution with error handling
|
369 |
-
if __name__ == "__main__":
|
370 |
-
try:
|
371 |
-
print("Starting setup...")
|
372 |
-
setup_modules()
|
373 |
-
|
374 |
-
print("Creating Gradio interface...")
|
375 |
-
with gr.Blocks(title="OneFormer with Contrast Detection") as iface:
|
376 |
-
gr.Markdown(title)
|
377 |
-
gr.Markdown(description)
|
378 |
-
|
379 |
-
with gr.Row():
|
380 |
-
with gr.Column(scale=1):
|
381 |
-
input_image = gr.Image(label="Input Image", type="filepath")
|
382 |
-
task = gr.Radio(
|
383 |
-
choices=["the task is panoptic", "the task is instance", "the task is semantic"],
|
384 |
-
value="the task is panoptic",
|
385 |
-
label="Task Token Input"
|
386 |
-
)
|
387 |
-
dataset = gr.Radio(
|
388 |
-
choices=["COCO (133 classes)", "Cityscapes (19 classes)", "ADE20K (150 classes)"],
|
389 |
-
value="COCO (133 classes)",
|
390 |
-
label="Model"
|
391 |
-
)
|
392 |
-
backbone = gr.Radio(
|
393 |
-
choices=["DiNAT-L", "Swin-L"],
|
394 |
-
value="DiNAT-L",
|
395 |
-
label="Backbone"
|
396 |
-
)
|
397 |
-
|
398 |
-
with gr.Accordion("Contrast Detection Options", open=False):
|
399 |
-
enable_contrast = gr.Checkbox(
|
400 |
-
label="Enable Contrast Detection",
|
401 |
-
value=False
|
402 |
-
)
|
403 |
-
contrast_method = gr.Radio(
|
404 |
-
choices=["luminance", "hue", "saturation"],
|
405 |
-
value="luminance",
|
406 |
-
label="Contrast Method"
|
407 |
-
)
|
408 |
-
contrast_threshold = gr.Slider(
|
409 |
-
minimum=1.0,
|
410 |
-
maximum=10.0,
|
411 |
-
value=4.5,
|
412 |
-
step=0.1,
|
413 |
-
label="Contrast Threshold (WCAG AA is 4.5)"
|
414 |
-
)
|
415 |
-
|
416 |
-
submit_btn = gr.Button("Analyze", variant="primary")
|
417 |
-
|
418 |
-
with gr.Column(scale=2):
|
419 |
-
with gr.Tabs():
|
420 |
-
with gr.TabItem("Segmentation Results"):
|
421 |
-
seg_output = gr.Image(type="pil", label="Segmentation Overlay")
|
422 |
-
seg_map = gr.Image(type="pil", label="Segmentation Map")
|
423 |
-
|
424 |
-
with gr.TabItem("Contrast Analysis"):
|
425 |
-
contrast_viz = gr.Image(type="pil", label="Contrast Visualization")
|
426 |
-
contrast_report = gr.Markdown(label="Contrast Analysis Report")
|
427 |
-
|
428 |
-
examples = [
|
429 |
-
["examples/coco.jpeg", "the task is panoptic", "COCO (133 classes)", "DiNAT-L", False, "luminance", 4.5],
|
430 |
-
["examples/cityscapes.png", "the task is panoptic", "Cityscapes (19 classes)", "DiNAT-L", False, "luminance", 4.5],
|
431 |
-
["examples/ade20k.jpeg", "the task is panoptic", "ADE20K (150 classes)", "DiNAT-L", False, "luminance", 4.5]
|
432 |
-
]
|
433 |
-
|
434 |
-
gr.Examples(
|
435 |
-
examples=examples,
|
436 |
-
inputs=[input_image, task, dataset, backbone, enable_contrast, contrast_method, contrast_threshold],
|
437 |
-
outputs=[seg_output, seg_map, contrast_viz, contrast_report],
|
438 |
-
fn=segment_and_analyze,
|
439 |
-
cache_examples=False
|
440 |
-
)
|
441 |
-
|
442 |
-
submit_btn.click(
|
443 |
-
fn=segment_and_analyze,
|
444 |
-
inputs=[input_image, task, dataset, backbone, enable_contrast, contrast_method, contrast_threshold],
|
445 |
-
outputs=[seg_output, seg_map, contrast_viz, contrast_report]
|
446 |
-
)
|
447 |
-
|
448 |
-
print("Launching Gradio app...")
|
449 |
-
iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
450 |
-
|
451 |
-
except Exception as e:
|
452 |
-
print(f"Error occurred: {str(e)}")
|
453 |
-
import traceback
|
454 |
-
traceback.print_exc()
|
455 |
-
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gradio_gpu.py
DELETED
@@ -1,234 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import numpy as np
|
3 |
-
from PIL import Image
|
4 |
-
import cv2
|
5 |
-
import imutils
|
6 |
-
import os
|
7 |
-
import sys
|
8 |
-
import time
|
9 |
-
from detectron2.config import get_cfg
|
10 |
-
from detectron2.projects.deeplab import add_deeplab_config
|
11 |
-
from detectron2.data import MetadataCatalog
|
12 |
-
from scipy import ndimage
|
13 |
-
import colorsys
|
14 |
-
import math
|
15 |
-
|
16 |
-
torch.set_num_threads(16)
|
17 |
-
torch.set_num_interop_threads(16)
|
18 |
-
|
19 |
-
from oneformer import (
|
20 |
-
add_oneformer_config,
|
21 |
-
add_common_config,
|
22 |
-
add_swin_config,
|
23 |
-
add_dinat_config,
|
24 |
-
)
|
25 |
-
|
26 |
-
from demo.defaults import DefaultPredictor
|
27 |
-
from demo.visualizer import Visualizer, ColorMode
|
28 |
-
|
29 |
-
import gradio as gr
|
30 |
-
from huggingface_hub import hf_hub_download
|
31 |
-
|
32 |
-
# NeuroNest specific imports
|
33 |
-
from utils.contrast_detector import ContrastDetector
|
34 |
-
from utils.luminance_contrast import LuminanceContrastDetector
|
35 |
-
from utils.hue_contrast import HueContrastDetector
|
36 |
-
from utils.saturation_contrast import SaturationContrastDetector
|
37 |
-
from utils.combined_contrast import CombinedContrastDetector
|
38 |
-
|
39 |
-
KEY_DICT = {
|
40 |
-
"ADE20K (150 classes)": "ade20k",
|
41 |
-
}
|
42 |
-
|
43 |
-
SWIN_CFG_DICT = {
|
44 |
-
"ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml",
|
45 |
-
}
|
46 |
-
|
47 |
-
SWIN_MODEL_DICT = {
|
48 |
-
"ade20k": hf_hub_download(
|
49 |
-
repo_id="shi-labs/oneformer_ade20k_swin_large",
|
50 |
-
filename="250_16_swin_l_oneformer_ade20k_160k.pth"
|
51 |
-
)
|
52 |
-
}
|
53 |
-
|
54 |
-
DINAT_CFG_DICT = {
|
55 |
-
"ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml",
|
56 |
-
}
|
57 |
-
|
58 |
-
DINAT_MODEL_DICT = {
|
59 |
-
"ade20k": hf_hub_download(
|
60 |
-
repo_id="shi-labs/oneformer_ade20k_dinat_large",
|
61 |
-
filename="250_16_dinat_l_oneformer_ade20k_160k.pth"
|
62 |
-
)
|
63 |
-
}
|
64 |
-
|
65 |
-
MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT, "Swin-L": SWIN_MODEL_DICT}
|
66 |
-
CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT, "Swin-L": SWIN_CFG_DICT}
|
67 |
-
WIDTH_DICT = {"ade20k": 640}
|
68 |
-
|
69 |
-
cpu_device = torch.device("cpu")
|
70 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
71 |
-
print(f"Using device: {device}")
|
72 |
-
|
73 |
-
PREDICTORS = {
|
74 |
-
"DiNAT-L": {"ADE20K (150 classes)": None},
|
75 |
-
"Swin-L": {"ADE20K (150 classes)": None}
|
76 |
-
}
|
77 |
-
|
78 |
-
METADATA = {
|
79 |
-
"DiNAT-L": {"ADE20K (150 classes)": None},
|
80 |
-
"Swin-L": {"ADE20K (150 classes)": None}
|
81 |
-
}
|
82 |
-
|
83 |
-
# Contrast detector mapping
|
84 |
-
CONTRAST_DETECTORS = {
|
85 |
-
"Luminance (WCAG)": LuminanceContrastDetector(),
|
86 |
-
"Hue": HueContrastDetector(),
|
87 |
-
"Saturation": SaturationContrastDetector(),
|
88 |
-
"Combined": CombinedContrastDetector()
|
89 |
-
}
|
90 |
-
|
91 |
-
def setup_modules():
|
92 |
-
for dataset in ["ADE20K (150 classes)"]:
|
93 |
-
for backbone in ["DiNAT-L", "Swin-L"]:
|
94 |
-
cfg = setup_cfg(dataset, backbone)
|
95 |
-
metadata = MetadataCatalog.get(
|
96 |
-
cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
|
97 |
-
)
|
98 |
-
PREDICTORS[backbone][dataset] = DefaultPredictor(cfg)
|
99 |
-
METADATA[backbone][dataset] = metadata
|
100 |
-
|
101 |
-
def setup_cfg(dataset, backbone):
|
102 |
-
cfg = get_cfg()
|
103 |
-
add_deeplab_config(cfg)
|
104 |
-
add_common_config(cfg)
|
105 |
-
add_swin_config(cfg)
|
106 |
-
add_oneformer_config(cfg)
|
107 |
-
add_dinat_config(cfg)
|
108 |
-
dataset = KEY_DICT[dataset]
|
109 |
-
cfg_path = CFG_DICT[backbone][dataset]
|
110 |
-
cfg.merge_from_file(cfg_path)
|
111 |
-
cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
112 |
-
cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]
|
113 |
-
cfg.freeze()
|
114 |
-
return cfg
|
115 |
-
|
116 |
-
def semantic_run(img, predictor, metadata):
|
117 |
-
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
|
118 |
-
predictions = predictor(img, "semantic")
|
119 |
-
out = visualizer.draw_sem_seg(
|
120 |
-
predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5
|
121 |
-
)
|
122 |
-
return out, predictions["sem_seg"].argmax(dim=0).to(cpu_device).numpy()
|
123 |
-
|
124 |
-
def analyze_contrast(image, segmentation, contrast_method, threshold):
|
125 |
-
"""Analyze contrast between segments using selected method"""
|
126 |
-
detector = CONTRAST_DETECTORS[contrast_method]
|
127 |
-
|
128 |
-
# Perform contrast analysis
|
129 |
-
contrast_image, problem_areas, stats = detector.analyze(
|
130 |
-
image, segmentation, threshold
|
131 |
-
)
|
132 |
-
|
133 |
-
return contrast_image, problem_areas, stats
|
134 |
-
|
135 |
-
def segment_and_analyze_contrast(path, backbone, contrast_method, threshold):
|
136 |
-
"""Main function to segment and analyze contrast"""
|
137 |
-
dataset = "ADE20K (150 classes)"
|
138 |
-
predictor = PREDICTORS[backbone][dataset]
|
139 |
-
metadata = METADATA[backbone][dataset]
|
140 |
-
|
141 |
-
# Read and resize image
|
142 |
-
img = cv2.imread(path)
|
143 |
-
if img is None:
|
144 |
-
return None, None, "Error: Could not load image"
|
145 |
-
|
146 |
-
width = WIDTH_DICT[KEY_DICT[dataset]]
|
147 |
-
img = imutils.resize(img, width=width)
|
148 |
-
|
149 |
-
# Get segmentation
|
150 |
-
out, seg_mask = semantic_run(img, predictor, metadata)
|
151 |
-
out_img = Image.fromarray(out.get_image())
|
152 |
-
|
153 |
-
# Analyze contrast
|
154 |
-
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
155 |
-
contrast_img, problem_areas, stats = analyze_contrast(
|
156 |
-
img_rgb, seg_mask, contrast_method, threshold
|
157 |
-
)
|
158 |
-
|
159 |
-
# Create stats text
|
160 |
-
stats_text = f"### Contrast Analysis Results\n\n"
|
161 |
-
stats_text += f"**Method:** {contrast_method}\n"
|
162 |
-
stats_text += f"**Threshold:** {threshold:.2f}\n"
|
163 |
-
stats_text += f"**Problem Areas:** {stats['problem_count']}\n"
|
164 |
-
|
165 |
-
if 'min_contrast' in stats:
|
166 |
-
stats_text += f"**Min Contrast:** {stats['min_contrast']:.2f}\n"
|
167 |
-
if 'max_contrast' in stats:
|
168 |
-
stats_text += f"**Max Contrast:** {stats['max_contrast']:.2f}\n"
|
169 |
-
if 'average_contrast' in stats:
|
170 |
-
stats_text += f"**Average Contrast:** {stats['average_contrast']:.2f}\n"
|
171 |
-
|
172 |
-
# Convert contrast image to PIL
|
173 |
-
contrast_pil = Image.fromarray(contrast_img)
|
174 |
-
|
175 |
-
return out_img, contrast_pil, stats_text
|
176 |
-
|
177 |
-
# Initialize models
|
178 |
-
setup_modules()
|
179 |
-
|
180 |
-
# Gradio Interface
|
181 |
-
title = "<h1 style='text-align: center'>NeuroNest: Abheek Pradhan - Contrast Model</h1>"
|
182 |
-
description = "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> "\
|
183 |
-
"<a href='https://github.com/lolout1/sam2Contrast' target='_blank'>Github Repo</a></p>" \
|
184 |
-
"<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'>" \
|
185 |
-
"I am developing NeuroNest, a contrast detection system designed to identify areas with insufficient contrast " \
|
186 |
-
"for individuals with Alzheimer's disease. This tool leverages OneFormer's state-of-the-art segmentation " \
|
187 |
-
"capabilities trained on ADE20K dataset to detect indoor objects like floors, furniture, walls, and ceilings. " \
|
188 |
-
"By analyzing contrast ratios between adjacent segments, NeuroNest flags potential visual accessibility issues " \
|
189 |
-
"that may trigger confusion or disorientation in elderly individuals with cognitive impairments.</p>" \
|
190 |
-
"<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'>" \
|
191 |
-
"[Note: When running on my Linux cluster, please request a GPU node for optimal performance. " \
|
192 |
-
"On login nodes, CUDA may not be available.]</p>"
|
193 |
-
|
194 |
-
gradio_inputs = [
|
195 |
-
gr.Image(label="Input Image", type="filepath"),
|
196 |
-
gr.Radio(choices=["Swin-L", "DiNAT-L"], value="Swin-L", label="Backbone"),
|
197 |
-
gr.Radio(
|
198 |
-
choices=["Luminance (WCAG)", "Hue", "Saturation", "Combined"],
|
199 |
-
value="Luminance (WCAG)",
|
200 |
-
label="Contrast Detection Method"
|
201 |
-
),
|
202 |
-
gr.Slider(
|
203 |
-
minimum=1.0,
|
204 |
-
maximum=10.0,
|
205 |
-
value=4.5,
|
206 |
-
step=0.1,
|
207 |
-
label="Contrast Threshold (Lower = More Strict)"
|
208 |
-
)
|
209 |
-
]
|
210 |
-
|
211 |
-
gradio_outputs = [
|
212 |
-
gr.Image(type="pil", label="Segmentation Result"),
|
213 |
-
gr.Image(type="pil", label="Contrast Analysis"),
|
214 |
-
gr.Markdown(label="Analysis Results")
|
215 |
-
]
|
216 |
-
|
217 |
-
examples = [
|
218 |
-
["examples/indoor_room.jpg", "Swin-L", "Luminance (WCAG)", 4.5],
|
219 |
-
["examples/living_room.jpg", "DiNAT-L", "Combined", 3.0],
|
220 |
-
]
|
221 |
-
|
222 |
-
iface = gr.Interface(
|
223 |
-
fn=segment_and_analyze_contrast,
|
224 |
-
inputs=gradio_inputs,
|
225 |
-
outputs=gradio_outputs,
|
226 |
-
examples_per_page=5,
|
227 |
-
allow_flagging="never",
|
228 |
-
examples=examples,
|
229 |
-
title=title,
|
230 |
-
description=description
|
231 |
-
)
|
232 |
-
|
233 |
-
if __name__ == "__main__":
|
234 |
-
iface.launch(server_name="0.0.0.0", share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|