lolout1 commited on
Commit
88d5ae6
·
1 Parent(s): 513c528

cleaning repo

Browse files
Files changed (5) hide show
  1. .gitignore +0 -1
  2. gradio_app.py +0 -222
  3. gradio_combine.py +0 -384
  4. gradio_contrast.py +0 -455
  5. gradio_gpu.py +0 -234
.gitignore CHANGED
@@ -9,7 +9,6 @@ build/
9
  dist/
10
  *.egg-info/
11
  .env
12
- .claude/
13
  oneformer/modeling/pixel_decoder/ops/build/
14
  oneformer/modeling/pixel_decoder/ops/dist/
15
  oneformer/modeling/pixel_decoder/ops/*.egg-info/
 
9
  dist/
10
  *.egg-info/
11
  .env
 
12
  oneformer/modeling/pixel_decoder/ops/build/
13
  oneformer/modeling/pixel_decoder/ops/dist/
14
  oneformer/modeling/pixel_decoder/ops/*.egg-info/
gradio_app.py DELETED
@@ -1,222 +0,0 @@
1
- import torch
2
-
3
- print("Installed the dependencies!")
4
-
5
- import numpy as np
6
- from PIL import Image
7
- import cv2
8
- import imutils
9
- import os
10
- import sys
11
- import time
12
- from detectron2.config import get_cfg
13
- from detectron2.projects.deeplab import add_deeplab_config
14
- from detectron2.data import MetadataCatalog
15
- torch.set_num_threads(16) # Use 16 CPU threads
16
- torch.set_num_interop_threads(16) # Inter-op parallelism
17
- from oneformer import (
18
- add_oneformer_config,
19
- add_common_config,
20
- add_swin_config,
21
- add_dinat_config,
22
- )
23
-
24
- from demo.defaults import DefaultPredictor
25
- from demo.visualizer import Visualizer, ColorMode
26
-
27
- import gradio as gr
28
- from huggingface_hub import hf_hub_download
29
-
30
- KEY_DICT = {"Cityscapes (19 classes)": "cityscapes",
31
- "COCO (133 classes)": "coco",
32
- "ADE20K (150 classes)": "ade20k",}
33
-
34
- SWIN_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_swin_large_IN21k_384_bs16_90k.yaml",
35
- "coco": "configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml",
36
- "ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml",}
37
-
38
- SWIN_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_swin_large",
39
- filename="250_16_swin_l_oneformer_cityscapes_90k.pth"),
40
- "coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_swin_large",
41
- filename="150_16_swin_l_oneformer_coco_100ep.pth"),
42
- "ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_swin_large",
43
- filename="250_16_swin_l_oneformer_ade20k_160k.pth")
44
- }
45
-
46
- DINAT_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_dinat_large_bs16_90k.yaml",
47
- "coco": "configs/coco/oneformer_dinat_large_bs16_100ep.yaml",
48
- "ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml",}
49
-
50
- DINAT_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_dinat_large",
51
- filename="250_16_dinat_l_oneformer_cityscapes_90k.pth"),
52
- "coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_dinat_large",
53
- filename="150_16_dinat_l_oneformer_coco_100ep.pth"),
54
- "ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_dinat_large",
55
- filename="250_16_dinat_l_oneformer_ade20k_160k.pth")
56
- }
57
-
58
- MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT,
59
- "Swin-L": SWIN_MODEL_DICT }
60
-
61
- CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT,
62
- "Swin-L": SWIN_CFG_DICT }
63
-
64
- WIDTH_DICT = {"cityscapes": 512,
65
- "coco": 512,
66
- "ade20k": 640}
67
-
68
- cpu_device = torch.device("cpu")
69
-
70
- PREDICTORS = {
71
- "DiNAT-L": {
72
- "Cityscapes (19 classes)": None,
73
- "COCO (133 classes)": None,
74
- "ADE20K (150 classes)": None
75
- },
76
- "Swin-L": {
77
- "Cityscapes (19 classes)": None,
78
- "COCO (133 classes)": None,
79
- "ADE20K (150 classes)": None
80
- }
81
- }
82
-
83
- METADATA = {
84
- "DiNAT-L": {
85
- "Cityscapes (19 classes)": None,
86
- "COCO (133 classes)": None,
87
- "ADE20K (150 classes)": None
88
- },
89
- "Swin-L": {
90
- "Cityscapes (19 classes)": None,
91
- "COCO (133 classes)": None,
92
- "ADE20K (150 classes)": None
93
- }
94
- }
95
-
96
- def setup_modules():
97
- for dataset in ["Cityscapes (19 classes)", "COCO (133 classes)", "ADE20K (150 classes)"]:
98
- for backbone in ["DiNAT-L", "Swin-L"]:
99
- cfg = setup_cfg(dataset, backbone)
100
- metadata = MetadataCatalog.get(
101
- cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
102
- )
103
- if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST_PANOPTIC[0]:
104
- from cityscapesscripts.helpers.labels import labels
105
- stuff_colors = [k.color for k in labels if k.trainId != 255]
106
- metadata = metadata.set(stuff_colors=stuff_colors)
107
- PREDICTORS[backbone][dataset] = DefaultPredictor(cfg)
108
- METADATA[backbone][dataset] = metadata
109
-
110
- def setup_cfg(dataset, backbone):
111
- # load config from file and command-line arguments
112
- cfg = get_cfg()
113
- add_deeplab_config(cfg)
114
- add_common_config(cfg)
115
- add_swin_config(cfg)
116
- add_oneformer_config(cfg)
117
- add_dinat_config(cfg)
118
- dataset = KEY_DICT[dataset]
119
- cfg_path = CFG_DICT[backbone][dataset]
120
- cfg.merge_from_file(cfg_path)
121
- if torch.cuda.is_available():
122
- cfg.MODEL.DEVICE = 'cuda'
123
- else:
124
- cfg.MODEL.DEVICE = 'cpu'
125
- cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]
126
- cfg.freeze()
127
- return cfg
128
-
129
- # def setup_modules(dataset, backbone):
130
- # cfg = setup_cfg(dataset, backbone)
131
- # predictor = DefaultPredictor(cfg)
132
- # # predictor = PREDICTORS[backbone][dataset]
133
- # metadata = MetadataCatalog.get(
134
- # cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
135
- # )
136
- # if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST_PANOPTIC[0]:
137
- # from cityscapesscripts.helpers.labels import labels
138
- # stuff_colors = [k.color for k in labels if k.trainId != 255]
139
- # metadata = metadata.set(stuff_colors=stuff_colors)
140
-
141
- # return predictor, metadata
142
-
143
- def panoptic_run(img, predictor, metadata):
144
- visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
145
- predictions = predictor(img, "panoptic")
146
- panoptic_seg, segments_info = predictions["panoptic_seg"]
147
- out = visualizer.draw_panoptic_seg_predictions(
148
- panoptic_seg.to(cpu_device), segments_info, alpha=0.5
149
- )
150
- visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
151
- out_map = visualizer_map.draw_panoptic_seg_predictions(
152
- panoptic_seg.to(cpu_device), segments_info, alpha=1, is_text=False
153
- )
154
- return out, out_map
155
-
156
- def instance_run(img, predictor, metadata):
157
- visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
158
- predictions = predictor(img, "instance")
159
- instances = predictions["instances"].to(cpu_device)
160
- out = visualizer.draw_instance_predictions(predictions=instances, alpha=0.5)
161
- visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
162
- out_map = visualizer_map.draw_instance_predictions(predictions=instances, alpha=1, is_text=False)
163
- return out, out_map
164
-
165
- def semantic_run(img, predictor, metadata):
166
- visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
167
- predictions = predictor(img, "semantic")
168
- out = visualizer.draw_sem_seg(
169
- predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5
170
- )
171
- visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
172
- out_map = visualizer_map.draw_sem_seg(
173
- predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=1, is_text=False
174
- )
175
- return out, out_map
176
-
177
- TASK_INFER = {"the task is panoptic": panoptic_run, "the task is instance": instance_run, "the task is semantic": semantic_run}
178
-
179
- def segment(path, task, dataset, backbone):
180
- # predictor, metadata = setup_modules(dataset, backbone)
181
- predictor = PREDICTORS[backbone][dataset]
182
- metadata = METADATA[backbone][dataset]
183
- img = cv2.imread(path)
184
- width = WIDTH_DICT[KEY_DICT[dataset]]
185
- img = imutils.resize(img, width=width)
186
- out, out_map = TASK_INFER[task](img, predictor, metadata)
187
- out = Image.fromarray(out.get_image())
188
- out_map = Image.fromarray(out_map.get_image())
189
- return out, out_map
190
-
191
- title = "<h1 style='text-align: center'>OneFormer:DIEGO MENTORIA MILIONÁRIA - APP 1</h1>"
192
- # style='margin-bottom: -10px;
193
- description = "<p style='font-size: 14px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://praeclarumjj3.github.io/' style='text-decoration:none' target='_blank'>Jitesh Jain, </a> <a href='https://chrisjuniorli.github.io/' style='text-decoration:none' target='_blank'>Jiachen Li<sup>*</sup>, </a> <a href='https://www.linkedin.com/in/mtchiu/' style='text-decoration:none' target='_blank'>MangTik Chiu<sup>*</sup>, </a> <a href='https://alihassanijr.com/' style='text-decoration:none' target='_blank'>Ali Hassani, </a> <a href='https://www.linkedin.com/in/nukich74/' style='text-decoration:none' target='_blank'>Nikita Orlov, </a> <a href='https://www.humphreyshi.com/home' style='text-decoration:none' target='_blank'>Humphrey Shi</a></p>" \
194
- + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/oneformer/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/2211.06220' target='_blank'>ArXiv Paper</a> | <a href='https://github.com/SHI-Labs/OneFormer' target='_blank'>Github Repo</a></p>" \
195
- + "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'> \
196
- OneFormer is the first multi-task universal image segmentation framework based on transformers. Our single OneFormer model achieves state-of-the-art performance across all three segmentation tasks with a single task-conditioned joint training process. OneFormer uses a task token to condition the model on the task in focus, making our architecture task-guided for training, and task-dynamic for inference, all with a single model. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.\
197
- </p>" \
198
- + "<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'> [Note: Inference on CPU may take upto 2 minutes. On a single RTX A6000 GPU, OneFormer is able to inference at more than 15 FPS.]</p>"
199
-
200
- setup_modules()
201
-
202
- gradio_inputs = [gr.Image(label="Input Image",type="filepath"),
203
- gr.Radio(choices=["the task is panoptic" ,"the task is instance", "the task is semantic"], type="value", value="the task is panoptic", label="Task Token Input"),
204
- gr.Radio(choices=["COCO (133 classes)" ,"Cityscapes (19 classes)", "ADE20K (150 classes)"], type="value", value="COCO (133 classes)", label="Model"),
205
- gr.Radio(choices=["DiNAT-L" ,"Swin-L"], type="value", value="DiNAT-L", label="Backbone"),
206
- ]
207
- gradio_outputs = [gr.Image(type="pil", label="Segmentation Overlay"), gr.Image(type="pil", label="Segmentation Map")]
208
-
209
-
210
- examples = [["examples/coco.jpeg", "the task is panoptic", "COCO (133 classes)", "DiNAT-L"],
211
- ["examples/cityscapes.png", "the task is panoptic", "Cityscapes (19 classes)", "DiNAT-L"],
212
- ["examples/ade20k.jpeg", "the task is panoptic", "ADE20K (150 classes)", "DiNAT-L"]]
213
-
214
-
215
- iface = gr.Interface(fn=segment, inputs=gradio_inputs,
216
- outputs=gradio_outputs,
217
- examples_per_page=5,
218
- allow_flagging="never",
219
- examples=examples, title=title,
220
- description=description)
221
-
222
- iface.launch(server_name="0.0.0.0",share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gradio_combine.py DELETED
@@ -1,384 +0,0 @@
1
- import torch
2
- import numpy as np
3
- from PIL import Image
4
- import cv2
5
- import imutils
6
- import os
7
- import sys
8
- import time
9
- import colorsys
10
- from scipy import ndimage
11
- import gradio as gr
12
- from huggingface_hub import hf_hub_download
13
-
14
- # Detectron2 imports
15
- from detectron2.config import get_cfg
16
- from detectron2.projects.deeplab import add_deeplab_config
17
- from detectron2.data import MetadataCatalog
18
- from detectron2.engine import DefaultPredictor as DetectronPredictor
19
- from detectron2 import model_zoo
20
-
21
- # OneFormer imports
22
- from oneformer import (
23
- add_oneformer_config,
24
- add_common_config,
25
- add_swin_config,
26
- add_dinat_config,
27
- )
28
- from demo.defaults import DefaultPredictor as OneFormerPredictor
29
- from demo.visualizer import Visualizer, ColorMode
30
-
31
- # NeuroNest contrast detection imports
32
- from utils.contrast_detector import ContrastDetector
33
- from utils.luminance_contrast import LuminanceContrastDetector
34
- from utils.hue_contrast import HueContrastDetector
35
- from utils.saturation_contrast import SaturationContrastDetector
36
- from utils.combined_contrast import CombinedContrastDetector
37
-
38
- # Set threads for CPU optimization
39
- torch.set_num_threads(4)
40
-
41
- ########################################
42
- # GLOBAL CONFIGURATIONS
43
- ########################################
44
-
45
- # OneFormer configurations
46
- KEY_DICT = {"ADE20K (150 classes)": "ade20k"}
47
-
48
- SWIN_CFG_DICT = {
49
- "ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml",
50
- }
51
-
52
- SWIN_MODEL_DICT = {
53
- "ade20k": hf_hub_download(
54
- repo_id="shi-labs/oneformer_ade20k_swin_large",
55
- filename="250_16_swin_l_oneformer_ade20k_160k.pth"
56
- )
57
- }
58
-
59
- DINAT_CFG_DICT = {
60
- "ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml",
61
- }
62
-
63
- DINAT_MODEL_DICT = {
64
- "ade20k": hf_hub_download(
65
- repo_id="shi-labs/oneformer_ade20k_dinat_large",
66
- filename="250_16_dinat_l_oneformer_ade20k_160k.pth"
67
- )
68
- }
69
-
70
- MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT, "Swin-L": SWIN_MODEL_DICT}
71
- CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT, "Swin-L": SWIN_CFG_DICT}
72
- WIDTH_DICT = {"ade20k": 640}
73
-
74
- # Contrast detector mapping
75
- CONTRAST_DETECTORS = {
76
- "Luminance (WCAG)": LuminanceContrastDetector(),
77
- "Hue": HueContrastDetector(),
78
- "Saturation": SaturationContrastDetector(),
79
- "Combined": CombinedContrastDetector()
80
- }
81
-
82
- # Device configuration
83
- cpu_device = torch.device("cpu")
84
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
- print(f"Using device: {device}")
86
-
87
- # Model storage
88
- ONEFORMER_PREDICTORS = {
89
- "DiNAT-L": {"ADE20K (150 classes)": None},
90
- "Swin-L": {"ADE20K (150 classes)": None}
91
- }
92
-
93
- ONEFORMER_METADATA = {
94
- "DiNAT-L": {"ADE20K (150 classes)": None},
95
- "Swin-L": {"ADE20K (150 classes)": None}
96
- }
97
-
98
- ########################################
99
- # MASK R-CNN SETUP AND FUNCTIONS
100
- ########################################
101
-
102
- def load_maskrcnn_model(weights_path, device="cuda", threshold=0.5):
103
- """Load Mask R-CNN model for blackspot detection"""
104
- cfg = get_cfg()
105
- cfg.merge_from_file(
106
- model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
107
- )
108
- cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 # [Floors, blackspot]
109
- cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold
110
- cfg.MODEL.WEIGHTS = weights_path
111
- # Fix: Convert torch.device to string
112
- cfg.MODEL.DEVICE = str(device) if isinstance(device, torch.device) else device
113
- return DetectronPredictor(cfg)
114
- def postprocess_blackspot_masks(im, instances, show_floor=True, show_blackspot=True):
115
- """Extract floor and blackspot masks from Mask R-CNN predictions"""
116
- height, width = im.shape[:2]
117
- pred_classes = instances.pred_classes.cpu().numpy()
118
- pred_masks = instances.pred_masks.cpu().numpy()
119
-
120
- combined_floor_mask = np.zeros((height, width), dtype=bool)
121
- final_blackspot = np.zeros((height, width), dtype=bool)
122
-
123
- for cls_id, mask in zip(pred_classes, pred_masks):
124
- if cls_id == 0 and show_floor: # Floor class
125
- combined_floor_mask |= mask
126
- elif cls_id == 1 and show_blackspot: # Blackspot class
127
- final_blackspot |= mask
128
-
129
- return combined_floor_mask.astype(np.uint8), final_blackspot.astype(np.uint8)
130
-
131
- ########################################
132
- # ONEFORMER SETUP AND FUNCTIONS
133
- ########################################
134
-
135
- def setup_oneformer_modules():
136
- """Initialize OneFormer models"""
137
- for dataset in ["ADE20K (150 classes)"]:
138
- for backbone in ["DiNAT-L", "Swin-L"]:
139
- cfg = setup_oneformer_cfg(dataset, backbone)
140
- metadata = MetadataCatalog.get(
141
- cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
142
- )
143
- ONEFORMER_PREDICTORS[backbone][dataset] = OneFormerPredictor(cfg)
144
- ONEFORMER_METADATA[backbone][dataset] = metadata
145
-
146
- def setup_oneformer_cfg(dataset, backbone):
147
- """Setup OneFormer configuration"""
148
- cfg = get_cfg()
149
- add_deeplab_config(cfg)
150
- add_common_config(cfg)
151
- add_swin_config(cfg)
152
- add_oneformer_config(cfg)
153
- add_dinat_config(cfg)
154
- dataset = KEY_DICT[dataset]
155
- cfg_path = CFG_DICT[backbone][dataset]
156
- cfg.merge_from_file(cfg_path)
157
- cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
158
- cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]
159
- cfg.freeze()
160
- return cfg
161
-
162
- def semantic_run(img, predictor, metadata):
163
- """Run OneFormer semantic segmentation"""
164
- visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
165
- predictions = predictor(img, "semantic")
166
- out = visualizer.draw_sem_seg(
167
- predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5
168
- )
169
- return out, predictions["sem_seg"].argmax(dim=0).to(cpu_device).numpy()
170
-
171
- ########################################
172
- # INTEGRATED ANALYSIS FUNCTION
173
- ########################################
174
-
175
- def integrated_analysis(image_path,
176
- # Blackspot detection parameters
177
- blackspot_threshold, show_floor, show_blackspot,
178
- # Contrast detection parameters
179
- enable_contrast, backbone, contrast_method, contrast_threshold):
180
- """
181
- Perform integrated analysis with both blackspot detection and contrast analysis
182
- """
183
- # Read the image
184
- im = cv2.imread(image_path)
185
- if im is None:
186
- return "Error: could not read image!", None, None, None
187
-
188
- # Resize for OneFormer if contrast analysis is enabled
189
- if enable_contrast:
190
- width = WIDTH_DICT["ade20k"]
191
- im_resized = imutils.resize(im, width=width)
192
- else:
193
- im_resized = im
194
-
195
- # Part 1: Blackspot Detection with Mask R-CNN
196
- blackspot_text = []
197
- blackspot_viz = None
198
-
199
- if show_floor or show_blackspot:
200
- weights_path = "./output_floor_blackspot/model_0004999.pth"
201
- maskrcnn_predictor = load_maskrcnn_model(weights_path, device, blackspot_threshold)
202
-
203
- # Run blackspot detection
204
- outputs = maskrcnn_predictor(im)
205
- instances = outputs["instances"]
206
-
207
- # Post-process masks
208
- floor_mask, blackspot_mask = postprocess_blackspot_masks(im, instances, show_floor, show_blackspot)
209
-
210
- # Create visualization
211
- blackspot_overlay = im.copy()
212
- overlay = np.zeros_like(im)
213
-
214
- if show_floor:
215
- overlay[floor_mask > 0] = (0, 255, 0) # Green for floor
216
- if show_blackspot:
217
- overlay[blackspot_mask > 0] = (0, 0, 255) # Red for blackspot
218
-
219
- blackspot_overlay = cv2.addWeighted(im, 1.0, overlay, 0.5, 0)
220
- blackspot_viz = Image.fromarray(cv2.cvtColor(blackspot_overlay, cv2.COLOR_BGR2RGB))
221
-
222
- # Calculate statistics
223
- blackspot_area = int(blackspot_mask.sum())
224
- floor_area = int(floor_mask.sum())
225
-
226
- blackspot_text.append(f"### Blackspot Detection Results")
227
- blackspot_text.append(f"**Threshold:** {blackspot_threshold:.2f}")
228
-
229
- if show_floor:
230
- blackspot_text.append(f"**Floor area:** {floor_area} pixels")
231
- if show_blackspot:
232
- blackspot_text.append(f"**Blackspot area:** {blackspot_area} pixels")
233
- if floor_area > 0 and show_floor:
234
- percentage = (blackspot_area / floor_area) * 100
235
- blackspot_text.append(f"**Blackspot coverage:** {percentage:.2f}% of floor area")
236
-
237
- # Part 2: Contrast Analysis with OneFormer
238
- segmentation_viz = None
239
- contrast_viz = None
240
- contrast_text = []
241
-
242
- if enable_contrast:
243
- dataset = "ADE20K (150 classes)"
244
- predictor = ONEFORMER_PREDICTORS[backbone][dataset]
245
- metadata = ONEFORMER_METADATA[backbone][dataset]
246
-
247
- # Get segmentation
248
- out, seg_mask = semantic_run(im_resized, predictor, metadata)
249
- segmentation_viz = Image.fromarray(out.get_image())
250
-
251
- # Analyze contrast
252
- img_rgb = cv2.cvtColor(im_resized, cv2.COLOR_BGR2RGB)
253
- detector = CONTRAST_DETECTORS[contrast_method]
254
- contrast_image, problem_areas, stats = detector.analyze(
255
- img_rgb, seg_mask, contrast_threshold
256
- )
257
-
258
- contrast_viz = Image.fromarray(contrast_image)
259
-
260
- # Create stats text
261
- contrast_text.append(f"### Contrast Analysis Results")
262
- contrast_text.append(f"**Method:** {contrast_method}")
263
- contrast_text.append(f"**Threshold:** {contrast_threshold:.2f}")
264
- contrast_text.append(f"**Problem Areas:** {stats['problem_count']}")
265
-
266
- if 'min_contrast' in stats:
267
- contrast_text.append(f"**Min Contrast:** {stats['min_contrast']:.2f}")
268
- if 'max_contrast' in stats:
269
- contrast_text.append(f"**Max Contrast:** {stats['max_contrast']:.2f}")
270
- if 'average_contrast' in stats:
271
- contrast_text.append(f"**Average Contrast:** {stats['average_contrast']:.2f}")
272
-
273
- # Combine results
274
- combined_text = []
275
- if blackspot_text:
276
- combined_text.extend(blackspot_text)
277
- if contrast_text:
278
- if blackspot_text:
279
- combined_text.append("\n")
280
- combined_text.extend(contrast_text)
281
-
282
- return "\n".join(combined_text), blackspot_viz, segmentation_viz, contrast_viz
283
-
284
- ########################################
285
- # GRADIO INTERFACE
286
- ########################################
287
-
288
- # Initialize models
289
- print("Initializing OneFormer models...")
290
- setup_oneformer_modules()
291
-
292
- title = "NeuroNest: Integrated Blackspot & Contrast Detection"
293
- description = """
294
- This integrated system combines:
295
- 1. **Blackspot Detection**: Uses Mask R-CNN to detect blackspots on floors
296
- 2. **Contrast Analysis**: Uses OneFormer segmentation to analyze contrast between objects
297
-
298
- Both analyses help identify potential accessibility issues for individuals with Alzheimer's disease.
299
- """
300
-
301
- # Create the Gradio interface
302
- with gr.Blocks(title=title) as demo:
303
- gr.Markdown(f"# {title}")
304
- gr.Markdown(description)
305
-
306
- with gr.Row():
307
- with gr.Column(scale=1):
308
- # Input image
309
- image_input = gr.Image(label="Input Image", type="filepath")
310
-
311
- # Blackspot detection controls
312
- with gr.Accordion("Blackspot Detection Settings", open=True):
313
- blackspot_threshold = gr.Slider(
314
- minimum=0.1, maximum=0.9, value=0.5, step=0.05,
315
- label="Blackspot Detection Threshold"
316
- )
317
- with gr.Row():
318
- show_floor = gr.Checkbox(value=True, label="Show Floor")
319
- show_blackspot = gr.Checkbox(value=True, label="Show Blackspots")
320
-
321
- # Contrast analysis controls
322
- with gr.Accordion("Contrast Analysis Settings", open=True):
323
- enable_contrast = gr.Checkbox(value=True, label="Enable Contrast Analysis")
324
- backbone = gr.Radio(
325
- choices=["Swin-L", "DiNAT-L"],
326
- value="Swin-L",
327
- label="OneFormer Backbone"
328
- )
329
- contrast_method = gr.Radio(
330
- choices=["Luminance (WCAG)", "Hue", "Saturation", "Combined"],
331
- value="Luminance (WCAG)",
332
- label="Contrast Detection Method"
333
- )
334
- contrast_threshold = gr.Slider(
335
- minimum=1.0, maximum=10.0, value=4.5, step=0.1,
336
- label="Contrast Threshold"
337
- )
338
-
339
- analyze_btn = gr.Button("Analyze", variant="primary")
340
-
341
- with gr.Column(scale=2):
342
- # Output displays
343
- with gr.Tabs():
344
- with gr.Tab("Analysis Report"):
345
- analysis_text = gr.Textbox(label="Analysis Results", lines=10)
346
-
347
- with gr.Tab("Blackspot Detection"):
348
- blackspot_output = gr.Image(label="Blackspot Visualization")
349
-
350
- with gr.Tab("Segmentation"):
351
- segmentation_output = gr.Image(label="OneFormer Segmentation")
352
-
353
- with gr.Tab("Contrast Analysis"):
354
- contrast_output = gr.Image(label="Contrast Visualization")
355
-
356
- # Connect the interface
357
- analyze_btn.click(
358
- fn=integrated_analysis,
359
- inputs=[
360
- image_input,
361
- blackspot_threshold, show_floor, show_blackspot,
362
- enable_contrast, backbone, contrast_method, contrast_threshold
363
- ],
364
- outputs=[
365
- analysis_text, blackspot_output, segmentation_output, contrast_output
366
- ]
367
- )
368
-
369
- # Examples
370
- gr.Examples(
371
- examples=[
372
- ["examples/indoor_room.jpg", 0.5, True, True, True, "Swin-L", "Luminance (WCAG)", 4.5],
373
- ["examples/living_room.jpg", 0.7, True, True, True, "DiNAT-L", "Combined", 3.0],
374
- ],
375
- inputs=[
376
- image_input,
377
- blackspot_threshold, show_floor, show_blackspot,
378
- enable_contrast, backbone, contrast_method, contrast_threshold
379
- ]
380
- )
381
-
382
- if __name__ == "__main__":
383
- print(f"Launching integrated NeuroNest app on device: {device}")
384
- demo.queue().launch(server_name="0.0.0.0", share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gradio_contrast.py DELETED
@@ -1,455 +0,0 @@
1
- import torch
2
- import os
3
- import sys
4
- import time
5
- import numpy as np
6
- from PIL import Image
7
- import cv2
8
- import imutils
9
- import colorsys
10
- from scipy import ndimage
11
-
12
- # Set CUDA device explicitly at the start
13
- if torch.cuda.is_available():
14
- torch.cuda.set_device(0) # Use first GPU
15
- print(f"Using GPU: {torch.cuda.get_device_name(0)}")
16
- else:
17
- print("WARNING: No GPU available, using CPU")
18
-
19
- print("Installed the dependencies!")
20
-
21
- from detectron2.config import get_cfg
22
- from detectron2.projects.deeplab import add_deeplab_config
23
- from detectron2.data import MetadataCatalog
24
-
25
- from oneformer import (
26
- add_oneformer_config,
27
- add_common_config,
28
- add_swin_config,
29
- add_dinat_config,
30
- )
31
-
32
- from demo.defaults import DefaultPredictor
33
- from demo.visualizer import Visualizer, ColorMode
34
-
35
- import gradio as gr
36
- from huggingface_hub import hf_hub_download
37
-
38
- # Force unbuffered output for SLURM logs
39
- sys.stdout = sys.__stdout__
40
- sys.stderr = sys.__stderr__
41
-
42
- # Set environment variables for better GPU performance
43
- os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
44
- os.environ['TORCH_USE_CUDA_DSA'] = '1'
45
-
46
- # Contrast Detection Classes
47
- class ContrastDetector:
48
- """Base class for contrast detection between segments"""
49
-
50
- @staticmethod
51
- def calculate_luminance_contrast(color1, color2):
52
- """Calculate WCAG luminance contrast ratio"""
53
- def get_relative_luminance(rgb):
54
- r, g, b = [val/255.0 for val in rgb]
55
- r = r/12.92 if r <= 0.03928 else ((r + 0.055)/1.055) ** 2.4
56
- g = g/12.92 if g <= 0.03928 else ((g + 0.055)/1.055) ** 2.4
57
- b = b/12.92 if b <= 0.03928 else ((b + 0.055)/1.055) ** 2.4
58
- return 0.2126 * r + 0.7152 * g + 0.0722 * b
59
-
60
- lum1 = get_relative_luminance(color1)
61
- lum2 = get_relative_luminance(color2)
62
-
63
- lighter = max(lum1, lum2)
64
- darker = min(lum1, lum2)
65
-
66
- return (lighter + 0.05) / (darker + 0.05)
67
-
68
- @staticmethod
69
- def calculate_hue_contrast(color1, color2):
70
- """Calculate hue difference between two colors"""
71
- hsv1 = colorsys.rgb_to_hsv(color1[0]/255.0, color1[1]/255.0, color1[2]/255.0)
72
- hsv2 = colorsys.rgb_to_hsv(color2[0]/255.0, color2[1]/255.0, color2[2]/255.0)
73
-
74
- hue_diff = abs(hsv1[0] - hsv2[0])
75
- if hue_diff > 0.5:
76
- hue_diff = 1 - hue_diff
77
-
78
- return hue_diff * 2
79
-
80
- @staticmethod
81
- def calculate_saturation_contrast(color1, color2):
82
- """Calculate saturation difference between two colors"""
83
- hsv1 = colorsys.rgb_to_hsv(color1[0]/255.0, color1[1]/255.0, color1[2]/255.0)
84
- hsv2 = colorsys.rgb_to_hsv(color2[0]/255.0, color2[1]/255.0, color2[2]/255.0)
85
-
86
- return abs(hsv1[1] - hsv2[1])
87
-
88
- @staticmethod
89
- def analyze_contrast(image, segmentation, method="luminance", threshold=4.5):
90
- """Analyze contrast between adjacent segments"""
91
- unique_segments = np.unique(segmentation)
92
- h, w = segmentation.shape
93
- contrast_mask = np.zeros((h, w), dtype=bool)
94
- problem_areas = []
95
-
96
- # Calculate average colors for each segment
97
- segment_colors = {}
98
- for seg_id in unique_segments:
99
- mask = segmentation == seg_id
100
- if np.any(mask):
101
- segment_colors[seg_id] = np.mean(image[mask], axis=0).astype(int)
102
-
103
- # Check contrast between adjacent segments
104
- for i in range(h):
105
- for j in range(w):
106
- current_seg = segmentation[i, j]
107
-
108
- # Check 4-connected neighbors
109
- for di, dj in [(0, 1), (1, 0), (0, -1), (-1, 0)]:
110
- ni, nj = i + di, j + dj
111
- if 0 <= ni < h and 0 <= nj < w:
112
- neighbor_seg = segmentation[ni, nj]
113
-
114
- if current_seg != neighbor_seg:
115
- color1 = segment_colors[current_seg]
116
- color2 = segment_colors[neighbor_seg]
117
-
118
- if method == "luminance":
119
- contrast = ContrastDetector.calculate_luminance_contrast(color1, color2)
120
- elif method == "hue":
121
- contrast = ContrastDetector.calculate_hue_contrast(color1, color2)
122
- threshold = 0.3 # Adjust threshold for hue
123
- elif method == "saturation":
124
- contrast = ContrastDetector.calculate_saturation_contrast(color1, color2)
125
- threshold = 0.3 # Adjust threshold for saturation
126
-
127
- if contrast < threshold:
128
- contrast_mask[i, j] = True
129
- problem_areas.append((current_seg, neighbor_seg, contrast))
130
-
131
- return contrast_mask, problem_areas, segment_colors
132
-
133
- # Rest of your code remains the same until setup_cfg function
134
- KEY_DICT = {"Cityscapes (19 classes)": "cityscapes",
135
- "COCO (133 classes)": "coco",
136
- "ADE20K (150 classes)": "ade20k",}
137
-
138
- SWIN_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_swin_large_IN21k_384_bs16_90k.yaml",
139
- "coco": "configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml",
140
- "ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml",}
141
-
142
- SWIN_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_swin_large",
143
- filename="250_16_swin_l_oneformer_cityscapes_90k.pth"),
144
- "coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_swin_large",
145
- filename="150_16_swin_l_oneformer_coco_100ep.pth"),
146
- "ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_swin_large",
147
- filename="250_16_swin_l_oneformer_ade20k_160k.pth")
148
- }
149
-
150
- DINAT_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_dinat_large_bs16_90k.yaml",
151
- "coco": "configs/coco/oneformer_dinat_large_bs16_100ep.yaml",
152
- "ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml",}
153
-
154
- DINAT_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_dinat_large",
155
- filename="250_16_dinat_l_oneformer_cityscapes_90k.pth"),
156
- "coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_dinat_large",
157
- filename="150_16_dinat_l_oneformer_coco_100ep.pth"),
158
- "ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_dinat_large",
159
- filename="250_16_dinat_l_oneformer_ade20k_160k.pth")
160
- }
161
-
162
- MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT,
163
- "Swin-L": SWIN_MODEL_DICT }
164
-
165
- CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT,
166
- "Swin-L": SWIN_CFG_DICT }
167
-
168
- WIDTH_DICT = {"cityscapes": 512,
169
- "coco": 512,
170
- "ade20k": 640}
171
-
172
- # Modified to ensure CUDA device
173
- if torch.cuda.is_available():
174
- device = torch.device("cuda:0")
175
- print(f"Using device: {device}")
176
- else:
177
- device = torch.device("cpu")
178
- print(f"WARNING: Using CPU device")
179
-
180
- cpu_device = torch.device("cpu")
181
-
182
- PREDICTORS = {
183
- "DiNAT-L": {
184
- "Cityscapes (19 classes)": None,
185
- "COCO (133 classes)": None,
186
- "ADE20K (150 classes)": None
187
- },
188
- "Swin-L": {
189
- "Cityscapes (19 classes)": None,
190
- "COCO (133 classes)": None,
191
- "ADE20K (150 classes)": None
192
- }
193
- }
194
-
195
- METADATA = {
196
- "DiNAT-L": {
197
- "Cityscapes (19 classes)": None,
198
- "COCO (133 classes)": None,
199
- "ADE20K (150 classes)": None
200
- },
201
- "Swin-L": {
202
- "Cityscapes (19 classes)": None,
203
- "COCO (133 classes)": None,
204
- "ADE20K (150 classes)": None
205
- }
206
- }
207
-
208
- def setup_modules():
209
- print("Setting up modules...")
210
- for dataset in ["Cityscapes (19 classes)", "COCO (133 classes)", "ADE20K (150 classes)"]:
211
- for backbone in ["DiNAT-L", "Swin-L"]:
212
- print(f"Loading {backbone} for {dataset}...")
213
- cfg = setup_cfg(dataset, backbone)
214
- metadata = MetadataCatalog.get(
215
- cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
216
- )
217
- if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST_PANOPTIC[0]:
218
- from cityscapesscripts.helpers.labels import labels
219
- stuff_colors = [k.color for k in labels if k.trainId != 255]
220
- metadata = metadata.set(stuff_colors=stuff_colors)
221
-
222
- # Create predictor with explicit device
223
- predictor = DefaultPredictor(cfg)
224
- predictor.model.to(device)
225
-
226
- PREDICTORS[backbone][dataset] = predictor
227
- METADATA[backbone][dataset] = metadata
228
- print(f"✓ Loaded {backbone} for {dataset}")
229
- print("All modules setup complete!")
230
-
231
- def setup_cfg(dataset, backbone):
232
- # load config from file and command-line arguments
233
- cfg = get_cfg()
234
- add_deeplab_config(cfg)
235
- add_common_config(cfg)
236
- add_swin_config(cfg)
237
- add_oneformer_config(cfg)
238
- add_dinat_config(cfg)
239
- dataset = KEY_DICT[dataset]
240
- cfg_path = CFG_DICT[backbone][dataset]
241
- cfg.merge_from_file(cfg_path)
242
-
243
- # Explicitly set device to CUDA if available
244
- if torch.cuda.is_available():
245
- cfg.MODEL.DEVICE = 'cuda:0'
246
- print(f"Config set to use CUDA device")
247
- else:
248
- cfg.MODEL.DEVICE = 'cpu'
249
- print(f"Config set to use CPU device")
250
-
251
- cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]
252
- cfg.freeze()
253
- return cfg
254
-
255
- # Rest of your functions remain the same
256
- def panoptic_run(img, predictor, metadata):
257
- visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
258
- predictions = predictor(img, "panoptic")
259
- panoptic_seg, segments_info = predictions["panoptic_seg"]
260
- out = visualizer.draw_panoptic_seg_predictions(
261
- panoptic_seg.to(cpu_device), segments_info, alpha=0.5
262
- )
263
- visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
264
- out_map = visualizer_map.draw_panoptic_seg_predictions(
265
- panoptic_seg.to(cpu_device), segments_info, alpha=1, is_text=False
266
- )
267
- return out, out_map, predictions
268
-
269
- def instance_run(img, predictor, metadata):
270
- visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
271
- predictions = predictor(img, "instance")
272
- instances = predictions["instances"].to(cpu_device)
273
- out = visualizer.draw_instance_predictions(predictions=instances, alpha=0.5)
274
- visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
275
- out_map = visualizer_map.draw_instance_predictions(predictions=instances, alpha=1, is_text=False)
276
- return out, out_map, predictions
277
-
278
- def semantic_run(img, predictor, metadata):
279
- visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
280
- predictions = predictor(img, "semantic")
281
- out = visualizer.draw_sem_seg(
282
- predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5
283
- )
284
- visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadata=metadata, instance_mode=ColorMode.IMAGE)
285
- out_map = visualizer_map.draw_sem_seg(
286
- predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=1, is_text=False
287
- )
288
- return out, out_map, predictions
289
-
290
- TASK_INFER = {"the task is panoptic": panoptic_run, "the task is instance": instance_run, "the task is semantic": semantic_run}
291
-
292
- def create_contrast_visualization(img, contrast_mask, problem_areas, segment_colors):
293
- """Create visualization of contrast issues"""
294
- # Copy original image
295
- contrast_viz = img.copy()
296
-
297
- # Highlight low contrast boundaries
298
- boundary_color = (255, 0, 0) # Red for problem areas
299
- contrast_viz[contrast_mask] = boundary_color
300
-
301
- # Add information overlay
302
- info_text = f"Low contrast areas detected: {len(problem_areas)}"
303
- cv2.putText(contrast_viz, info_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
304
-
305
- return contrast_viz
306
-
307
- def segment_and_analyze(path, task, dataset, backbone, enable_contrast, contrast_method, contrast_threshold):
308
- # Get predictions and segmentation visualization
309
- predictor = PREDICTORS[backbone][dataset]
310
- metadata = METADATA[backbone][dataset]
311
- img = cv2.imread(path)
312
- width = WIDTH_DICT[KEY_DICT[dataset]]
313
- img = imutils.resize(img, width=width)
314
-
315
- out, out_map, predictions = TASK_INFER[task](img, predictor, metadata)
316
- out_img = Image.fromarray(out.get_image())
317
- out_map_img = Image.fromarray(out_map.get_image())
318
-
319
- if not enable_contrast:
320
- return out_img, out_map_img, None, None
321
-
322
- # Extract segmentation mask from predictions
323
- if task == "the task is semantic":
324
- seg_mask = predictions["sem_seg"].argmax(dim=0).cpu().numpy()
325
- elif task == "the task is panoptic":
326
- seg_mask, _ = predictions["panoptic_seg"]
327
- seg_mask = seg_mask.cpu().numpy()
328
- elif task == "the task is instance":
329
- # For instance segmentation, create a mask from instances
330
- instances = predictions["instances"].to("cpu")
331
- seg_mask = np.zeros(img.shape[:2], dtype=np.int32)
332
- for i, mask in enumerate(instances.pred_masks):
333
- seg_mask[mask] = i + 1
334
-
335
- # Analyze contrast
336
- contrast_mask, problem_areas, segment_colors = ContrastDetector.analyze_contrast(
337
- img, seg_mask, method=contrast_method, threshold=contrast_threshold
338
- )
339
-
340
- # Create contrast visualization
341
- contrast_viz = create_contrast_visualization(img, contrast_mask, problem_areas, segment_colors)
342
- contrast_viz_img = Image.fromarray(contrast_viz[:, :, ::-1]) # Convert BGR to RGB
343
-
344
- # Generate analysis report
345
- report = f"### Contrast Analysis Report\n\n"
346
- report += f"**Method:** {contrast_method.capitalize()}\n"
347
- report += f"**Threshold:** {contrast_threshold}\n"
348
- report += f"**Total segments:** {len(segment_colors)}\n"
349
- report += f"**Low contrast boundaries found:** {len(problem_areas)}\n\n"
350
-
351
- if problem_areas:
352
- report += "**Problem Areas:**\n"
353
- for i, (seg1, seg2, contrast_value) in enumerate(problem_areas[:10]): # Show first 10
354
- report += f"- Segments {seg1} and {seg2}: Contrast ratio = {contrast_value:.2f}\n"
355
- if len(problem_areas) > 10:
356
- report += f"... and {len(problem_areas) - 10} more\n"
357
-
358
- return out_img, out_map_img, contrast_viz_img, report
359
-
360
- title = "<h1 style='text-align: center'>OneFormer:DIEGO MENTORIA MILIONÁRIA - APP 1</h1>"
361
- description = "<p style='font-size: 14px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://github.com/lolout1/sam2Contrast' style='text-decoration:none' target='_blank'>NeuroNest Contrast Model</a></p>" \
362
- + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/oneformer/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/2211.06220' target='_blank'>ArXiv Paper</a> | <a href='https://github.com/SHI-Labs/OneFormer' target='_blank'>Github Repo</a></p>" \
363
- + "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'> \
364
- This model leverages the OneFormer architecture to perform comprehensive image segmentation and labeling across multiple tasks. The system can identify and segment various objects, structures, and regions within images with high accuracy. It supports semantic, instance, and panoptic segmentation modes, enabling detailed analysis of indoor and outdoor environments. The model excels at distinguishing between different classes of objects, from common everyday items to complex urban structures, making it particularly useful for environmental analysis and scene understanding applications.\
365
- </p>" \
366
- + "<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'> [Note: Inference on CPU may take upto 2 minutes. On a single RTX A6000 GPU, OneFormer is able to inference at more than 15 FPS.]</p>"
367
-
368
- # Main execution with error handling
369
- if __name__ == "__main__":
370
- try:
371
- print("Starting setup...")
372
- setup_modules()
373
-
374
- print("Creating Gradio interface...")
375
- with gr.Blocks(title="OneFormer with Contrast Detection") as iface:
376
- gr.Markdown(title)
377
- gr.Markdown(description)
378
-
379
- with gr.Row():
380
- with gr.Column(scale=1):
381
- input_image = gr.Image(label="Input Image", type="filepath")
382
- task = gr.Radio(
383
- choices=["the task is panoptic", "the task is instance", "the task is semantic"],
384
- value="the task is panoptic",
385
- label="Task Token Input"
386
- )
387
- dataset = gr.Radio(
388
- choices=["COCO (133 classes)", "Cityscapes (19 classes)", "ADE20K (150 classes)"],
389
- value="COCO (133 classes)",
390
- label="Model"
391
- )
392
- backbone = gr.Radio(
393
- choices=["DiNAT-L", "Swin-L"],
394
- value="DiNAT-L",
395
- label="Backbone"
396
- )
397
-
398
- with gr.Accordion("Contrast Detection Options", open=False):
399
- enable_contrast = gr.Checkbox(
400
- label="Enable Contrast Detection",
401
- value=False
402
- )
403
- contrast_method = gr.Radio(
404
- choices=["luminance", "hue", "saturation"],
405
- value="luminance",
406
- label="Contrast Method"
407
- )
408
- contrast_threshold = gr.Slider(
409
- minimum=1.0,
410
- maximum=10.0,
411
- value=4.5,
412
- step=0.1,
413
- label="Contrast Threshold (WCAG AA is 4.5)"
414
- )
415
-
416
- submit_btn = gr.Button("Analyze", variant="primary")
417
-
418
- with gr.Column(scale=2):
419
- with gr.Tabs():
420
- with gr.TabItem("Segmentation Results"):
421
- seg_output = gr.Image(type="pil", label="Segmentation Overlay")
422
- seg_map = gr.Image(type="pil", label="Segmentation Map")
423
-
424
- with gr.TabItem("Contrast Analysis"):
425
- contrast_viz = gr.Image(type="pil", label="Contrast Visualization")
426
- contrast_report = gr.Markdown(label="Contrast Analysis Report")
427
-
428
- examples = [
429
- ["examples/coco.jpeg", "the task is panoptic", "COCO (133 classes)", "DiNAT-L", False, "luminance", 4.5],
430
- ["examples/cityscapes.png", "the task is panoptic", "Cityscapes (19 classes)", "DiNAT-L", False, "luminance", 4.5],
431
- ["examples/ade20k.jpeg", "the task is panoptic", "ADE20K (150 classes)", "DiNAT-L", False, "luminance", 4.5]
432
- ]
433
-
434
- gr.Examples(
435
- examples=examples,
436
- inputs=[input_image, task, dataset, backbone, enable_contrast, contrast_method, contrast_threshold],
437
- outputs=[seg_output, seg_map, contrast_viz, contrast_report],
438
- fn=segment_and_analyze,
439
- cache_examples=False
440
- )
441
-
442
- submit_btn.click(
443
- fn=segment_and_analyze,
444
- inputs=[input_image, task, dataset, backbone, enable_contrast, contrast_method, contrast_threshold],
445
- outputs=[seg_output, seg_map, contrast_viz, contrast_report]
446
- )
447
-
448
- print("Launching Gradio app...")
449
- iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
450
-
451
- except Exception as e:
452
- print(f"Error occurred: {str(e)}")
453
- import traceback
454
- traceback.print_exc()
455
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gradio_gpu.py DELETED
@@ -1,234 +0,0 @@
1
- import torch
2
- import numpy as np
3
- from PIL import Image
4
- import cv2
5
- import imutils
6
- import os
7
- import sys
8
- import time
9
- from detectron2.config import get_cfg
10
- from detectron2.projects.deeplab import add_deeplab_config
11
- from detectron2.data import MetadataCatalog
12
- from scipy import ndimage
13
- import colorsys
14
- import math
15
-
16
- torch.set_num_threads(16)
17
- torch.set_num_interop_threads(16)
18
-
19
- from oneformer import (
20
- add_oneformer_config,
21
- add_common_config,
22
- add_swin_config,
23
- add_dinat_config,
24
- )
25
-
26
- from demo.defaults import DefaultPredictor
27
- from demo.visualizer import Visualizer, ColorMode
28
-
29
- import gradio as gr
30
- from huggingface_hub import hf_hub_download
31
-
32
- # NeuroNest specific imports
33
- from utils.contrast_detector import ContrastDetector
34
- from utils.luminance_contrast import LuminanceContrastDetector
35
- from utils.hue_contrast import HueContrastDetector
36
- from utils.saturation_contrast import SaturationContrastDetector
37
- from utils.combined_contrast import CombinedContrastDetector
38
-
39
- KEY_DICT = {
40
- "ADE20K (150 classes)": "ade20k",
41
- }
42
-
43
- SWIN_CFG_DICT = {
44
- "ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml",
45
- }
46
-
47
- SWIN_MODEL_DICT = {
48
- "ade20k": hf_hub_download(
49
- repo_id="shi-labs/oneformer_ade20k_swin_large",
50
- filename="250_16_swin_l_oneformer_ade20k_160k.pth"
51
- )
52
- }
53
-
54
- DINAT_CFG_DICT = {
55
- "ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml",
56
- }
57
-
58
- DINAT_MODEL_DICT = {
59
- "ade20k": hf_hub_download(
60
- repo_id="shi-labs/oneformer_ade20k_dinat_large",
61
- filename="250_16_dinat_l_oneformer_ade20k_160k.pth"
62
- )
63
- }
64
-
65
- MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT, "Swin-L": SWIN_MODEL_DICT}
66
- CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT, "Swin-L": SWIN_CFG_DICT}
67
- WIDTH_DICT = {"ade20k": 640}
68
-
69
- cpu_device = torch.device("cpu")
70
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
71
- print(f"Using device: {device}")
72
-
73
- PREDICTORS = {
74
- "DiNAT-L": {"ADE20K (150 classes)": None},
75
- "Swin-L": {"ADE20K (150 classes)": None}
76
- }
77
-
78
- METADATA = {
79
- "DiNAT-L": {"ADE20K (150 classes)": None},
80
- "Swin-L": {"ADE20K (150 classes)": None}
81
- }
82
-
83
- # Contrast detector mapping
84
- CONTRAST_DETECTORS = {
85
- "Luminance (WCAG)": LuminanceContrastDetector(),
86
- "Hue": HueContrastDetector(),
87
- "Saturation": SaturationContrastDetector(),
88
- "Combined": CombinedContrastDetector()
89
- }
90
-
91
- def setup_modules():
92
- for dataset in ["ADE20K (150 classes)"]:
93
- for backbone in ["DiNAT-L", "Swin-L"]:
94
- cfg = setup_cfg(dataset, backbone)
95
- metadata = MetadataCatalog.get(
96
- cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
97
- )
98
- PREDICTORS[backbone][dataset] = DefaultPredictor(cfg)
99
- METADATA[backbone][dataset] = metadata
100
-
101
- def setup_cfg(dataset, backbone):
102
- cfg = get_cfg()
103
- add_deeplab_config(cfg)
104
- add_common_config(cfg)
105
- add_swin_config(cfg)
106
- add_oneformer_config(cfg)
107
- add_dinat_config(cfg)
108
- dataset = KEY_DICT[dataset]
109
- cfg_path = CFG_DICT[backbone][dataset]
110
- cfg.merge_from_file(cfg_path)
111
- cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
112
- cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]
113
- cfg.freeze()
114
- return cfg
115
-
116
- def semantic_run(img, predictor, metadata):
117
- visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
118
- predictions = predictor(img, "semantic")
119
- out = visualizer.draw_sem_seg(
120
- predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5
121
- )
122
- return out, predictions["sem_seg"].argmax(dim=0).to(cpu_device).numpy()
123
-
124
- def analyze_contrast(image, segmentation, contrast_method, threshold):
125
- """Analyze contrast between segments using selected method"""
126
- detector = CONTRAST_DETECTORS[contrast_method]
127
-
128
- # Perform contrast analysis
129
- contrast_image, problem_areas, stats = detector.analyze(
130
- image, segmentation, threshold
131
- )
132
-
133
- return contrast_image, problem_areas, stats
134
-
135
- def segment_and_analyze_contrast(path, backbone, contrast_method, threshold):
136
- """Main function to segment and analyze contrast"""
137
- dataset = "ADE20K (150 classes)"
138
- predictor = PREDICTORS[backbone][dataset]
139
- metadata = METADATA[backbone][dataset]
140
-
141
- # Read and resize image
142
- img = cv2.imread(path)
143
- if img is None:
144
- return None, None, "Error: Could not load image"
145
-
146
- width = WIDTH_DICT[KEY_DICT[dataset]]
147
- img = imutils.resize(img, width=width)
148
-
149
- # Get segmentation
150
- out, seg_mask = semantic_run(img, predictor, metadata)
151
- out_img = Image.fromarray(out.get_image())
152
-
153
- # Analyze contrast
154
- img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
155
- contrast_img, problem_areas, stats = analyze_contrast(
156
- img_rgb, seg_mask, contrast_method, threshold
157
- )
158
-
159
- # Create stats text
160
- stats_text = f"### Contrast Analysis Results\n\n"
161
- stats_text += f"**Method:** {contrast_method}\n"
162
- stats_text += f"**Threshold:** {threshold:.2f}\n"
163
- stats_text += f"**Problem Areas:** {stats['problem_count']}\n"
164
-
165
- if 'min_contrast' in stats:
166
- stats_text += f"**Min Contrast:** {stats['min_contrast']:.2f}\n"
167
- if 'max_contrast' in stats:
168
- stats_text += f"**Max Contrast:** {stats['max_contrast']:.2f}\n"
169
- if 'average_contrast' in stats:
170
- stats_text += f"**Average Contrast:** {stats['average_contrast']:.2f}\n"
171
-
172
- # Convert contrast image to PIL
173
- contrast_pil = Image.fromarray(contrast_img)
174
-
175
- return out_img, contrast_pil, stats_text
176
-
177
- # Initialize models
178
- setup_modules()
179
-
180
- # Gradio Interface
181
- title = "<h1 style='text-align: center'>NeuroNest: Abheek Pradhan - Contrast Model</h1>"
182
- description = "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> "\
183
- "<a href='https://github.com/lolout1/sam2Contrast' target='_blank'>Github Repo</a></p>" \
184
- "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'>" \
185
- "I am developing NeuroNest, a contrast detection system designed to identify areas with insufficient contrast " \
186
- "for individuals with Alzheimer's disease. This tool leverages OneFormer's state-of-the-art segmentation " \
187
- "capabilities trained on ADE20K dataset to detect indoor objects like floors, furniture, walls, and ceilings. " \
188
- "By analyzing contrast ratios between adjacent segments, NeuroNest flags potential visual accessibility issues " \
189
- "that may trigger confusion or disorientation in elderly individuals with cognitive impairments.</p>" \
190
- "<p style='text-align: center; font-size: 14px; margin: 5px; font-weight: w300;'>" \
191
- "[Note: When running on my Linux cluster, please request a GPU node for optimal performance. " \
192
- "On login nodes, CUDA may not be available.]</p>"
193
-
194
- gradio_inputs = [
195
- gr.Image(label="Input Image", type="filepath"),
196
- gr.Radio(choices=["Swin-L", "DiNAT-L"], value="Swin-L", label="Backbone"),
197
- gr.Radio(
198
- choices=["Luminance (WCAG)", "Hue", "Saturation", "Combined"],
199
- value="Luminance (WCAG)",
200
- label="Contrast Detection Method"
201
- ),
202
- gr.Slider(
203
- minimum=1.0,
204
- maximum=10.0,
205
- value=4.5,
206
- step=0.1,
207
- label="Contrast Threshold (Lower = More Strict)"
208
- )
209
- ]
210
-
211
- gradio_outputs = [
212
- gr.Image(type="pil", label="Segmentation Result"),
213
- gr.Image(type="pil", label="Contrast Analysis"),
214
- gr.Markdown(label="Analysis Results")
215
- ]
216
-
217
- examples = [
218
- ["examples/indoor_room.jpg", "Swin-L", "Luminance (WCAG)", 4.5],
219
- ["examples/living_room.jpg", "DiNAT-L", "Combined", 3.0],
220
- ]
221
-
222
- iface = gr.Interface(
223
- fn=segment_and_analyze_contrast,
224
- inputs=gradio_inputs,
225
- outputs=gradio_outputs,
226
- examples_per_page=5,
227
- allow_flagging="never",
228
- examples=examples,
229
- title=title,
230
- description=description
231
- )
232
-
233
- if __name__ == "__main__":
234
- iface.launch(server_name="0.0.0.0", share=True)