Spaces:

Egrt
/

HEAT

Runtime error

App Files Files Community

Egrt commited on Nov 23, 2022

Commit

424188c

0 Parent(s):

init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +18 -0
HEAT.py +460 -0
LICENSE +674 -0
README.md +21 -0
app.py +33 -0
arguments.py +33 -0
assets/img/pipeline.png +0 -0
assets/img/problem_description.png +0 -0
datasets/__init__.py +0 -0
datasets/corners.py +183 -0
datasets/data_utils.py +57 -0
datasets/outdoor_buildings.py +183 -0
datasets/s3d_floorplans.py +187 -0
images/test.jpg +0 -0
infer.py +455 -0
metrics/get_metric.py +219 -0
metrics/new_utils.py +2100 -0
models/__init__.py +0 -0
models/corner_models.py +275 -0
models/corner_to_edge.py +232 -0
models/deformable_transformer.py +236 -0
models/edge_models.py +314 -0
models/loss.py +63 -0
models/mlp.py +21 -0
models/ops/functions/__init__.py +10 -0
models/ops/functions/ms_deform_attn_func.py +61 -0
models/ops/make.sh +10 -0
models/ops/modules/__init__.py +9 -0
models/ops/modules/ms_deform_attn.py +115 -0
models/ops/setup.py +71 -0
models/ops/src/cpu/ms_deform_attn_cpu.cpp +41 -0
models/ops/src/cpu/ms_deform_attn_cpu.h +33 -0
models/ops/src/cuda/ms_deform_attn_cuda.cu +153 -0
models/ops/src/cuda/ms_deform_attn_cuda.h +30 -0
models/ops/src/cuda/ms_deform_im2col_cuda.cuh +1327 -0
models/ops/src/ms_deform_attn.h +62 -0
models/ops/src/vision.cpp +16 -0
models/ops/test.py +89 -0
models/resnet.py +167 -0
models/stacked_hg.py +246 -0
predict.py +33 -0
qualitative_outdoor/generate_html.py +64 -0
qualitative_outdoor/plot_utils.py +43 -0
qualitative_outdoor/visualize_gt.py +46 -0
qualitative_outdoor/visualize_npy.py +46 -0
requirements.txt +27 -0
s3d_floorplan_eval/DataRW/DataRW.py +4 -0
s3d_floorplan_eval/DataRW/S3DRW.py +142 -0
s3d_floorplan_eval/DataRW/wrong_annotatios.py +1 -0
s3d_floorplan_eval/Evaluator/Evaluator.py +457 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,18 @@

+./data
+.DS_Store
+viz/*
+*.tar
+*.pdf
+*.zip
+svg_*
+*.html
+models/ops/build
+models/ops/dist
+models/ops/*egg-info
+__pycache__
+results
+montefloor_data
+.idea
+model_data/checkpoints
+model_data/heat_checkpoints
+shpfile/

HEAT.py ADDED Viewed

	@@ -0,0 +1,460 @@

+'''
+Author: [egrt]
+Date: 2022-08-23 11:44:15
+LastEditors: Egrt
+LastEditTime: 2022-11-23 15:25:35
+Description: HEAT的模型加载与预测
+'''
+from turtle import pos
+import torch
+import torch.nn as nn
+from models.resnet import ResNetBackbone
+from models.corner_models import HeatCorner
+from models.edge_models import HeatEdge
+from models.corner_to_edge import get_infer_edge_pairs
+from datasets.data_utils import get_pixel_features
+from huggingface_hub import hf_hub_download
+from PIL import Image
+from utils import image_utils
+from osgeo import gdal, ogr, osr
+from tqdm import tqdm
+import os
+import scipy
+import numpy as np
+import cv2
+import skimage
+class HEAT(object):
+    #-----------------------------------------#
+    #   注意修改model_path
+    #-----------------------------------------#
+    _defaults = {
+        #-----------------------------------------------#
+        #  model_data指向整体网络的地址
+        #-----------------------------------------------#
+        "model_data"        : 'model_data/heat_checkpoints/checkpoints/ckpts_heat_outdoor_256/checkpoint.pth',
+        #-----------------------------------------------#
+        #   image_size模型预测图像的像素大小
+        #-----------------------------------------------#
+        "image_size"       : [256, 256],
+        #-----------------------------------------------#
+        #   patch_size为模型切片的大小
+        #-----------------------------------------------#
+        "patch_size"        : 512,
+        #-----------------------------------------------#
+        #   patch_overlap为切片重叠像素
+        #-----------------------------------------------#
+        "patch_overlap"     : 0,
+        #-----------------------------------------------#
+        #   corner_thresh为预测角点的阈值大小
+        #-----------------------------------------------#
+        "corner_thresh"     : 0.01,
+        #-----------------------------------------------#
+        #   基于角点候选数的最大边数（不能大于6）
+        #-----------------------------------------------#
+        "corner_to_edge_multiplier": 3,
+        #-----------------------------------------------#
+        #   边缘推理筛选的迭代次数
+        #-----------------------------------------------#
+        "infer_times"       : 3,
+        #-------------------------------#
+        #   是否使用Cuda
+        #   没有GPU可以设置成False
+        #-------------------------------#
+        "cuda"              : False,
+    }
+    #---------------------------------------------------#
+    #   初始化MASKGAN
+    #---------------------------------------------------#
+    def __init__(self, **kwargs):
+        self.__dict__.update(self._defaults)
+        for name, value in kwargs.items():
+            setattr(self, name, value)
+        self.generate()
+    def generate(self):
+        # 从Huggingface加载整体网络模型
+        filepath = hf_hub_download(repo_id="Egrt/HEAT", filename="checkpoint.pth")
+        self.model = torch.load(filepath)
+        # 加载Backbone
+        self.backbone = ResNetBackbone()
+        strides = self.backbone.strides
+        num_channels = self.backbone.num_channels
+        self.backbone = nn.DataParallel(self.backbone)
+        self.backbone = self.backbone.cuda()
+        self.backbone.eval()
+        # 加载角点检测模型
+        self.corner_model = HeatCorner(input_dim=128, hidden_dim=256, num_feature_levels=4, backbone_strides=strides,
+                                backbone_num_channels=num_channels)
+        self.corner_model = nn.DataParallel(self.corner_model)
+        self.corner_model = self.corner_model.cuda()
+        self.corner_model.eval()
+        # 加载边缘检测模型
+        self.edge_model = HeatEdge(input_dim=128, hidden_dim=256, num_feature_levels=4, backbone_strides=strides,
+                            backbone_num_channels=num_channels)
+        self.edge_model = nn.DataParallel(self.edge_model)
+        self.edge_model = self.edge_model.cuda()
+        self.edge_model.eval()
+        # 分别加载模型的地址
+        self.backbone.load_state_dict(self.model['backbone'])
+        self.corner_model.load_state_dict(self.model['corner_model'])
+        self.edge_model.load_state_dict(self.model['edge_model'])
+    def detect_one_image(self, image):
+        #---------------------------------------------------------#
+        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+        #---------------------------------------------------------#
+        image       = cvtColor(image)
+        # 这里判断图片是否需要分成多个patch
+        if image.size[0] < self.patch_size or image.size[1] < self.patch_size:
+            is_slice = False
+        else:
+            is_slice = True
+        if is_slice:
+            # 复制原图
+            image       = np.array(image, dtype=np.uint8)
+            # 复制输入的原图
+            viz_image   = image.copy()
+            height, width = image.shape[0], image.shape[1]
+            # 获取缩放比例
+            scale = self.patch_size / self.image_size[0]
+            # 初始化角点、边缘列表
+            pred_corners, pred_confs, pos_edges, edge_confs, c_outputs_np = [], [], [], [], []
+            # 开始切分
+            stride = self.patch_size - self.patch_overlap
+            patch_boundingboxes = image_utils.compute_patch_boundingboxes((height, width),
+                                                                      stride=stride,
+                                                                      patch_res=self.patch_size)
+            edge_len = 0
+            # 获取切分后的图片
+            for bbox in tqdm(patch_boundingboxes, desc="使用切分进行预测", leave=False):
+                # 切分图像
+                crop_image = image[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
+                # np转Image类
+                crop_image = Image.fromarray(crop_image)
+                try:
+                    pred_corners, pred_confs, pos_edges, edge_confs, c_outputs_np, _ = self.predict_no_patching(crop_image)
+                except RuntimeError as e:
+                    print("ERROR: " + str(e))
+                    print("INFO: 减小patch_size 直到适合内存")
+                    raise e
+                # 拼接角点数组
+                pred_corners[:, 0] = pred_corners[:, 0] * scale + bbox[0]
+                pred_corners[:, 1] = pred_corners[:, 1] * scale + bbox[1]
+                pred_corners_viz = pred_corners
+                viz_image   = visualize_cond_generation(pred_corners_viz, pred_confs, viz_image, edges=pos_edges,
+                                edge_confs=edge_confs, shpfile=False)
+            hr_image = Image.fromarray(np.uint8(viz_image))
+        else:
+            pred_corners, pred_confs, pos_edges, edge_confs, c_outputs_np, viz_image = self.predict_no_patching(image)
+            #---------------------------------------------------------#
+            #   此处推理结束
+            #   开始在原图上根据角点坐标绘制角点与边缘
+            #---------------------------------------------------------#
+            pred_corners_viz = pred_corners
+            image_result = visualize_cond_generation(pred_corners_viz, pred_confs, viz_image, edges=pos_edges,
+                            edge_confs=edge_confs, shpfile=True)
+            hr_image = Image.fromarray(np.uint8(image_result))
+        return hr_image
+    #---------------------------------------------------------#
+    #   不使用切片预测图像
+    #   返回预测后的角点坐标、边缘
+    #---------------------------------------------------------#
+    def predict_no_patching(self, image):
+        image       = image.resize(tuple(self.image_size), Image.BICUBIC)
+        # 将Image类转换为numpy
+        image       = np.array(image, dtype=np.uint8)
+        # 复制输入的原图
+        viz_image   = image.copy()
+        # preprocess image  numpy->tensor
+        image       = process_image(image)
+        #   获取所有像素的位置编码, 默认的图像尺度为256
+        pixels, pixel_features = get_pixel_features(image_size=self.image_size[0])
+        #   开始模型的预测
+        with torch.no_grad():
+            image_feats, feat_mask, all_image_feats = self.backbone(image)
+            pixel_features = pixel_features.unsqueeze(0).repeat(image.shape[0], 1, 1, 1)
+            preds_s1       = self.corner_model(image_feats, feat_mask, pixel_features, pixels, all_image_feats)
+            c_outputs = preds_s1
+            # 获取预测出的角点
+            c_outputs_np = c_outputs[0].detach().cpu().numpy()
+            # 筛选出大于阈值的角点的坐标
+            pos_indices = np.where(c_outputs_np >= self.corner_thresh)
+            pred_corners = pixels[pos_indices]
+            # 获取对应预测角点的置信度
+            pred_confs = c_outputs_np[pos_indices]
+            # 根据预测角点的置信度进行非极大抑制
+            pred_corners, pred_confs = corner_nms(pred_corners, pred_confs, image_size=c_outputs.shape[1])
+            # 对角点两两排列组合，获取所有的角点对
+            pred_corners, pred_confs, edge_coords, edge_mask, edge_ids = get_infer_edge_pairs(pred_corners, pred_confs)
+            # 获取角点数量
+            corner_nums = torch.tensor([len(pred_corners)]).to(image.device)
+            max_candidates = torch.stack([corner_nums.max() * self.corner_to_edge_multiplier] * len(corner_nums), dim=0)
+            # 无序不重复集合
+            all_pos_ids = set()
+            # 边缘置信度字典
+            all_edge_confs = dict()
+            # 推理的迭代次数为3次
+            for tt in range(self.infer_times):
+                if tt == 0:
+                    # gt_values和边缘掩膜大小���样且初始值为0
+                    gt_values = torch.zeros_like(edge_mask).long()
+                    # 第一二维度的数值设置为2
+                    gt_values[:, :] = 2
+                # 开始预测边缘
+                s1_logits, s2_logits_hb, s2_logits_rel, selected_ids, s2_mask, s2_gt_values = self.edge_model(image_feats,
+                    feat_mask,pixel_features,edge_coords, edge_mask,gt_values, corner_nums,max_candidates,True)
+                num_total = s1_logits.shape[2]
+                num_selected = selected_ids.shape[1]
+                num_filtered = num_total - num_selected
+                # 将输出值固定为(0,1)之间的概率分布
+                s1_preds = s1_logits.squeeze().softmax(0)
+                s2_preds_rel = s2_logits_rel.squeeze().softmax(0)
+                s2_preds_hb = s2_logits_hb.squeeze().softmax(0)
+                s1_preds_np = s1_preds[1, :].detach().cpu().numpy()
+                s2_preds_rel_np = s2_preds_rel[1, :].detach().cpu().numpy()
+                s2_preds_hb_np = s2_preds_hb[1, :].detach().cpu().numpy()
+                selected_ids = selected_ids.squeeze().detach().cpu().numpy()
+                # 进行筛选，将(0.9, 1)之间的设置为T，将(0.01,0.9)之间的设置为U,(0,0.01)之间的设置为F
+                if tt != self.infer_times - 1:
+                    s2_preds_np = s2_preds_hb_np
+                    pos_edge_ids = np.where(s2_preds_np >= 0.9)
+                    neg_edge_ids = np.where(s2_preds_np <= 0.01)
+                    for pos_id in pos_edge_ids[0]:
+                        actual_id = selected_ids[pos_id]
+                        if gt_values[0, actual_id] != 2:
+                            continue
+                        all_pos_ids.add(actual_id)
+                        all_edge_confs[actual_id] = s2_preds_np[pos_id]
+                        gt_values[0, actual_id] = 1
+                    for neg_id in neg_edge_ids[0]:
+                        actual_id = selected_ids[neg_id]
+                        if gt_values[0, actual_id] != 2:
+                            continue
+                        gt_values[0, actual_id] = 0
+                    num_to_pred = (gt_values == 2).sum()
+                    if num_to_pred <= num_filtered:
+                        break
+                else:
+                    s2_preds_np = s2_preds_hb_np
+                    pos_edge_ids = np.where(s2_preds_np >= 0.5)
+                    for pos_id in pos_edge_ids[0]:
+                        actual_id = selected_ids[pos_id]
+                        if s2_mask[0][pos_id] is True or gt_values[0, actual_id] != 2:
+                            continue
+                        all_pos_ids.add(actual_id)
+                        all_edge_confs[actual_id] = s2_preds_np[pos_id]
+            pos_edge_ids = list(all_pos_ids)
+            edge_confs = [all_edge_confs[idx] for idx in pos_edge_ids]
+            pos_edges = edge_ids[pos_edge_ids].cpu().numpy()
+            edge_confs = np.array(edge_confs)
+            if self.image_size[0] != 256:
+                pred_corners = pred_corners / (self.image_size[0] / 256)
+        return pred_corners, pred_confs, pos_edges, edge_confs, c_outputs_np, viz_image
+#---------------------------------------------------------#
+#   将图像转换成RGB图像，防止灰度图在预测时报错。
+#   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+#---------------------------------------------------------#
+def cvtColor(image):
+    if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
+        return image
+    else:
+        image = image.convert('RGB')
+        return image
+#---------------------------------------------------------#
+#   根据角点的置信度排序，并筛选出大于置信度的角点坐标
+#---------------------------------------------------------#
+def corner_nms(preds, confs, image_size):
+    data = np.zeros([image_size, image_size])
+    neighborhood_size = 5
+    threshold = 0
+    for i in range(len(preds)):
+        data[preds[i, 1], preds[i, 0]] = confs[i]
+    data_max = scipy.ndimage.filters.maximum_filter(data, neighborhood_size)
+    maxima = (data == data_max)
+    data_min = scipy.ndimage.filters.minimum_filter(data, neighborhood_size)
+    diff = ((data_max - data_min) > threshold)
+    maxima[diff == 0] = 0
+    results = np.where(maxima > 0)
+    filtered_preds = np.stack([results[1], results[0]], axis=-1)
+    new_confs = list()
+    for i, pred in enumerate(filtered_preds):
+        new_confs.append(data[pred[1], pred[0]])
+    new_confs = np.array(new_confs)
+    return filtered_preds, new_confs
+def process_image(img):
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    img = skimage.img_as_float(img)
+    img = img.transpose((2, 0, 1))
+    img = (img - np.array(mean)[:, np.newaxis, np.newaxis]) / np.array(std)[:, np.newaxis, np.newaxis]
+    img = torch.Tensor(img).cuda()
+    img = img.unsqueeze(0)
+    return img
+def postprocess_preds(corners, confs, edges):
+    corner_degrees = dict()
+    for edge_i, edge_pair in enumerate(edges):
+        corner_degrees[edge_pair[0]] = corner_degrees.setdefault(edge_pair[0], 0) + 1
+        corner_degrees[edge_pair[1]] = corner_degrees.setdefault(edge_pair[1], 0) + 1
+    good_ids = [i for i in range(len(corners)) if i in corner_degrees]
+    if len(good_ids) == len(corners):
+        return corners, confs, edges
+    else:
+        good_corners = corners[good_ids]
+        good_confs = confs[good_ids]
+        id_mapping = {value: idx for idx, value in enumerate(good_ids)}
+        new_edges = list()
+        for edge_pair in edges:
+            new_pair = (id_mapping[edge_pair[0]], id_mapping[edge_pair[1]])
+            new_edges.append(new_pair)
+        new_edges = np.array(new_edges)
+        return good_corners, good_confs, new_edges
+#---------------------------------------------------------#
+#   将输入图像根据角点坐标进行可视化处理
+#   不同于源代码，我们需要直接返回图像对象而不是保存到指定地址
+#---------------------------------------------------------#
+def visualize_cond_generation(positive_pixels, confs, image, gt_corners=None, prec=None, recall=None,
+                              image_masks=None, edges=None, edge_confs=None, shpfile=False):
+    # 复制原图
+    image = image.copy()
+    if confs is not None:
+        viz_confs = confs
+    if edges is not None:
+        preds = positive_pixels.astype(int)
+        c_degrees = dict()
+        for edge_i, edge_pair in enumerate(edges):
+            conf = (edge_confs[edge_i] * 2) - 1
+            cv2.line(image, tuple(preds[edge_pair[0]]), tuple(preds[edge_pair[1]]), (255 * conf, 255 * conf, 0), 2)
+            c_degrees[edge_pair[0]] = c_degrees.setdefault(edge_pair[0], 0) + 1
+            c_degrees[edge_pair[1]] = c_degrees.setdefault(edge_pair[1], 0) + 1
+    for idx, c in enumerate(positive_pixels):
+        if edges is not None and idx not in c_degrees:
+            continue
+        if confs is None:
+            cv2.circle(image, (int(c[0]), int(c[1])), 3, (0, 0, 255), -1)
+        else:
+            cv2.circle(image, (int(c[0]), int(c[1])), 3, (0, 0, 255 * viz_confs[idx]), -1)
+        # if edges is not None:
+        #    cv2.putText(image, '{}'.format(c_degrees[idx]), (int(c[0]), int(c[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX,
+        #                0.5, (255, 0, 0), 1, cv2.LINE_AA)
+    if gt_corners is not None:
+        for c in gt_corners:
+            cv2.circle(image, (int(c[0]), int(c[1])), 3, (0, 255, 0), -1)
+    if image_masks is not None:
+        mask_ids = np.where(image_masks == 1)[0]
+        for mask_id in mask_ids:
+            y_idx = mask_id // 64
+            x_idx = (mask_id - y_idx * 64)
+            x_coord = x_idx * 4
+            y_coord = y_idx * 4
+            cv2.rectangle(image, (x_coord, y_coord), (x_coord + 3, y_coord + 3), (127, 127, 0), thickness=-1)
+    # if confs is not None:
+    #    cv2.putText(image, 'max conf: {:.3f}'.format(confs.max()), (20, 20), cv2.FONT_HERSHEY_SIMPLEX,
+    #                0.5, (255, 255, 0), 1, cv2.LINE_AA)
+    if prec is not None:
+        if isinstance(prec, tuple):
+            cv2.putText(image, 'edge p={:.2f}, edge r={:.2f}'.format(prec[0], recall[0]), (20, 20),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.5, (255, 255, 0), 1, cv2.LINE_AA)
+            cv2.putText(image, 'region p={:.2f}, region r={:.2f}'.format(prec[1], recall[1]), (20, 40),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.5, (255, 255, 0), 1, cv2.LINE_AA)
+        else:
+            cv2.putText(image, 'prec={:.2f}, recall={:.2f}'.format(prec, recall), (20, 20), cv2.FONT_HERSHEY_SIMPLEX,
+                        0.5, (255, 255, 0), 1, cv2.LINE_AA)
+    # 是否生成shp文件
+    if shpfile:
+        preds = positive_pixels.astype(int)
+        # 获取点列表
+        Polyline = []
+        for edge_i, edge_pair in enumerate(edges):
+            Polyline.append([preds[edge_pair[0]], preds[edge_pair[1]]])
+        Polyline = np.array(Polyline, dtype=np.int32)
+        # 写入shp文件
+        writeShp(save_file_dir="shpfile", Polyline=Polyline)
+    return image
+def writeShp(save_file_dir="shpfile", Polyline=None):
+    # 创建文件夹
+    if os.path.exists(save_file_dir) is False:
+        os.makedirs(save_file_dir)
+    # 支持中文路径
+    gdal.SetConfigOption("GDAL_FILENAME_IS_UTF8", "YES")
+    # 属性表字段支持中文
+    gdal.SetConfigOption("SHAPE_ENCODING", "UTF-8")
+    # 注册驱动
+    ogr.RegisterAll()
+    # 创建shp数据
+    strDriverName = "ESRI Shapefile"
+    oDriver = ogr.GetDriverByName(strDriverName)
+    if oDriver == None:
+        return "驱动不可用："+strDriverName
+    # 创建数据源
+    file_path = os.path.join(save_file_dir, "result.shp")
+    oDS = oDriver.CreateDataSource(file_path)
+    if oDS == None:
+        return "创建文件失败：result.shp"
+    if Polyline is not None:
+        # 创建一个多边形图层，指定坐标系为WGS84
+        papszLCO = []
+        geosrs = osr.SpatialReference()
+        geosrs.SetWellKnownGeogCS("WGS84")
+        # 线：ogr_type = ogr.wkbLineString
+        # 点：ogr_type = ogr.wkbPoint
+        ogr_type = ogr.wkbMultiLineString
+        # 面的类型为Polygon，线的类型为Polyline，点的类型为Point
+        oLayer = oDS.CreateLayer("Polyline", geosrs, ogr_type, papszLCO)
+        if oLayer == None:
+            return "图层创建失败！"
+        # 创建属性表
+        # 创建id字段
+        oId = ogr.FieldDefn("id", ogr.OFTInteger)
+        oLayer.CreateField(oId, 1)
+        # 创建name字段
+        oName = ogr.FieldDefn("name", ogr.OFTString)
+        oLayer.CreateField(oName, 1)
+        oDefn = oLayer.GetLayerDefn()
+        # 创建要素
+        # 数据集
+        # wkt_geom id name
+        point_str_list = ['({} {},{} {})'.format(row[0, 0], row[0, 1], row[1, 0], row[1, 1]) for row in Polyline]
+        Polyline_Wkt = ','.join(point_str_list)
+        features = ['Polyline0;MULTILINESTRING({})'.format(Polyline_Wkt)]
+        for index, f in enumerate(features):
+            oFeaturePolygon = ogr.Feature(oDefn)
+            oFeaturePolygon.SetField("id",index)
+            oFeaturePolygon.SetField("name",f.split(";")[0])
+            geomPolygon = ogr.CreateGeometryFromWkt(f.split(";")[1])
+            oFeaturePolygon.SetGeometry(geomPolygon)
+            oLayer.CreateFeature(oFeaturePolygon)
+        # 创建完成后，关闭进程
+        oDS.Destroy()
+    return "数据集创建完成！"

LICENSE ADDED Viewed

	@@ -0,0 +1,674 @@

+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Use with the GNU Affero General Public License.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.

README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+<!--
+ * @Description:
+ * @Author: Egrt
+ * @Date: 2022-11-23 15:20:00
+ * @LastEditors: Egrt
+ * @LastEditTime: 2022-11-23 15:27:41
+-->
+---
+title: HEAT
+emoji: 📈
+colorFrom: indigo
+colorTo: yellow
+sdk: gradio
+sdk_version: 3.11.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,33 @@

+'''
+Author: Egrt
+Date: 2022-01-13 13:34:10
+LastEditors: [egrt]
+LastEditTime: 2022-08-15 19:40:32
+FilePath: \MaskGAN\app.py
+'''
+from HEAT import HEAT
+import gradio as gr
+import os
+heat = HEAT()
+# --------模型推理---------- #
+def inference(img):
+    image_result = heat.detect_one_image(img)
+    return image_result
+# --------网页信息---------- #
+title = "HEAT"
+description = "HEAT: Holistic Edge Attention Transformer for Structured Reconstruction   @Luuuu"
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2108.10257' target='_blank'>HEAT: Holistic Edge Attention Transformer for Structured Reconstruction </a> | <a href='https://github.com/JingyunLiang/SwinIR' target='_blank'>Github Repo</a></p>"
+example_img_dir  = 'images/'
+example_img_name = os.listdir(example_img_dir)
+examples=[[os.path.join(example_img_dir, image_path)] for image_path in example_img_name if image_path.endswith(('.jpg','.jpeg', '.png'))]
+gr.Interface(
+    inference,
+    [gr.inputs.Image(type="pil", label="Input")],
+    gr.outputs.Image(type="pil", label="Output"),
+    title=title,
+    description=description,
+    article=article,
+    examples=examples
+    ).launch()

arguments.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import argparse
+def get_args_parser():
+    parser = argparse.ArgumentParser('Holistic edge attention transformer', add_help=False)
+    parser.add_argument('--exp_dataset', default='outdoor',
+                        help='the dataset for experiments, outdoor/s3d_floorplan')
+    parser.add_argument('--lr', default=2e-4, type=float)
+    parser.add_argument('--batch_size', default=16, type=int)
+    parser.add_argument('--weight_decay', default=1e-5, type=float)
+    parser.add_argument('--epochs', default=800, type=int)
+    parser.add_argument('--lr_drop', default=600, type=int)
+    parser.add_argument('--clip_max_norm', default=0.1, type=float,
+                        help='gradient clipping max norm')
+    parser.add_argument('--print_freq', default=40, type=int)
+    parser.add_argument('--output_dir', default='./checkpoints/ckpts_heat_outdoor_256',
+                        help='path where to save, empty for no saving')
+    parser.add_argument('--resume', default='',
+                        help='resume from checkpoint')
+    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
+                        help='start epoch')
+    parser.add_argument('--num_workers', default=4, type=int)
+    parser.add_argument('--image_size', default=256, type=int)
+    parser.add_argument('--max_corner_num', default=150, type=int,
+                        help='the max number of corners allowed in the experiments')
+    parser.add_argument('--corner_to_edge_multiplier', default=3, type=int,
+                        help='the max number of edges based on the number of corner candidates (assuming the '
+                             'average degree never greater than 6)')
+    parser.add_argument('--lambda_corner', default=0.05, type=float,
+                        help='the max number of corners allowed in the experiments')
+    parser.add_argument('--run_validation', action='store_true',
+            help='Whether run validation or not, default: False')
+    return parser

assets/img/pipeline.png ADDED Viewed

assets/img/problem_description.png ADDED Viewed

datasets/__init__.py ADDED Viewed

File without changes

datasets/corners.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import numpy as np
+from torch.utils.data import Dataset
+from scipy.ndimage import gaussian_filter
+import cv2
+mean = [0.485, 0.456, 0.406]
+std = [0.229, 0.224, 0.225]
+class CornersDataset(Dataset):
+    def __init__(self, image_size=256, inference=False):
+        super(CornersDataset, self).__init__()
+        self.image_size = image_size
+        self.inference = inference
+        self._data_names = []
+    def __len__(self):
+        raise len(self._data_names)
+    def __getitem__(self, idx):
+        raise NotImplementedError
+    def process_data(self, data):
+        img = data['image']
+        corners = data['corners']
+        annot = data['annot']
+        # pre-process the image to use ImageNet-pretrained backbones
+        img = img.transpose((2, 0, 1))
+        raw_img = img.copy()
+        img = (img - np.array(mean)[:, np.newaxis, np.newaxis]) / np.array(std)[:, np.newaxis, np.newaxis]
+        img = img.astype(np.float32)
+        corners = np.array(corners)
+        all_data = {
+            "annot": annot,
+            "name": data['name'],
+            'img': img,
+            'annot_path': data['annot_path'],
+            'img_path': data['img_path'],
+            'det_path': data['det_path'],
+            'raw_img': raw_img,
+        }
+        # corner labels for training
+        if not self.inference:
+            pixel_labels, gauss_labels = self.get_corner_labels(corners)
+            all_data['pixel_labels'] = pixel_labels
+            all_data['gauss_labels'] = gauss_labels
+        return all_data
+    def get_corner_labels(self, corners):
+        labels = np.zeros((self.image_size, self.image_size))
+        corners = corners.round()
+        xint, yint = corners[:, 0].astype(np.int), corners[:, 1].astype(np.int)
+        labels[yint, xint] = 1
+        gauss_labels = gaussian_filter(labels, sigma=2)
+        gauss_labels = gauss_labels / gauss_labels.max()
+        return labels, gauss_labels
+    def resize_data(self, image, annot, det_corners):
+        new_image = cv2.resize(image, (self.image_size, self.image_size))
+        new_annot = {}
+        r = self.image_size / 256
+        for c, connections in annot.items():
+            new_c = tuple(np.array(c) * r)
+            new_connections = [other_c * r for other_c in connections]
+            new_annot[new_c] = new_connections
+        new_dets = det_corners * r
+        return new_image, new_annot, new_dets
+    def random_aug_annot(self, img, annot, det_corners=None):
+        # do random flipping
+        img, annot, det_corners = self.random_flip(img, annot, det_corners)
+        # prepare random augmentation parameters (only do random rotation for now)
+        theta = np.random.randint(0, 360) / 360 * np.pi * 2
+        r = self.image_size / 256
+        origin = [127 * r, 127 * r]
+        p1_new = [127 * r + 100 * np.sin(theta) * r, 127 * r - 100 * np.cos(theta) * r]
+        p2_new = [127 * r + 100 * np.cos(theta) * r, 127 * r + 100 * np.sin(theta) * r]
+        p1_old = [127 * r, 127 * r - 100 * r]  # y_axis
+        p2_old = [127 * r + 100 * r, 127 * r]  # x_axis
+        pts1 = np.array([origin, p1_old, p2_old]).astype(np.float32)
+        pts2 = np.array([origin, p1_new, p2_new]).astype(np.float32)
+        M_rot = cv2.getAffineTransform(pts1, pts2)
+        # Combine annotation corners and detection corners
+        all_corners = list(annot.keys())
+        if det_corners is not None:
+            for i in range(det_corners.shape[0]):
+                all_corners.append(tuple(det_corners[i]))
+        all_corners_ = np.array(all_corners)
+        # Do the corner transform within a big matrix transformation
+        corner_mapping = dict()
+        ones = np.ones([all_corners_.shape[0], 1])
+        all_corners_ = np.concatenate([all_corners_, ones], axis=-1)
+        aug_corners = np.matmul(M_rot, all_corners_.T).T
+        for idx, corner in enumerate(all_corners):
+            corner_mapping[corner] = aug_corners[idx]
+        # If the transformed geometry goes beyond image boundary, we simply re-do the augmentation
+        new_corners = np.array(list(corner_mapping.values()))
+        if new_corners.min() <= 0 or new_corners.max() >= (self.image_size - 1):
+            # return self.random_aug_annot(img, annot, det_corners)
+            return img, annot, None, det_corners
+        # build the new annot dict
+        aug_annot = dict()
+        for corner, connections in annot.items():
+            new_corner = corner_mapping[corner]
+            tuple_new_corner = tuple(new_corner)
+            aug_annot[tuple_new_corner] = list()
+            for to_corner in connections:
+                aug_annot[tuple_new_corner].append(corner_mapping[tuple(to_corner)])
+        # Also transform the image correspondingly
+        rows, cols, ch = img.shape
+        new_img = cv2.warpAffine(img, M_rot, (cols, rows), borderValue=(255, 255, 255))
+        y_start = (new_img.shape[0] - self.image_size) // 2
+        x_start = (new_img.shape[1] - self.image_size) // 2
+        aug_img = new_img[y_start:y_start + self.image_size, x_start:x_start + self.image_size, :]
+        if det_corners is None:
+            return aug_img, aug_annot, corner_mapping, None
+        else:
+            aug_det_corners = list()
+            for corner in det_corners:
+                new_corner = corner_mapping[tuple(corner)]
+                aug_det_corners.append(new_corner)
+            aug_det_corners = np.array(aug_det_corners)
+            return aug_img, aug_annot, corner_mapping, aug_det_corners
+    def random_flip(self, img, annot, det_corners):
+        height, width, _ = img.shape
+        rand_int = np.random.randint(0, 4)
+        if rand_int == 0:
+            return img, annot, det_corners
+        all_corners = list(annot.keys())
+        if det_corners is not None:
+            for i in range(det_corners.shape[0]):
+                all_corners.append(tuple(det_corners[i]))
+        new_corners = np.array(all_corners)
+        if rand_int == 1:
+            img = img[:, ::-1, :]
+            new_corners[:, 0] = width - new_corners[:, 0]
+        elif rand_int == 2:
+            img = img[::-1, :, :]
+            new_corners[:, 1] = height - new_corners[:, 1]
+        else:
+            img = img[::-1, ::-1, :]
+            new_corners[:, 0] = width - new_corners[:, 0]
+            new_corners[:, 1] = height - new_corners[:, 1]
+        new_corners = np.clip(new_corners, 0, self.image_size - 1)  # clip into [0, 255]
+        corner_mapping = dict()
+        for idx, corner in enumerate(all_corners):
+            corner_mapping[corner] = new_corners[idx]
+        aug_annot = dict()
+        for corner, connections in annot.items():
+            new_corner = corner_mapping[corner]
+            tuple_new_corner = tuple(new_corner)
+            aug_annot[tuple_new_corner] = list()
+            for to_corner in connections:
+                aug_annot[tuple_new_corner].append(corner_mapping[tuple(to_corner)])
+        if det_corners is not None:
+            aug_det_corners = list()
+            for corner in det_corners:
+                new_corner = corner_mapping[tuple(corner)]
+                aug_det_corners.append(new_corner)
+            det_corners = np.array(aug_det_corners)
+        return img, aug_annot, det_corners

datasets/data_utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from PIL import ImageFilter
+from torchvision import transforms
+import numpy as np
+from utils.nn_utils import positional_encoding_2d
+from torch.utils.data.dataloader import default_collate
+def RandomBlur(radius=2.):
+    blur = GaussianBlur(radius=radius)
+    full_transform = transforms.RandomApply([blur], p=.3)
+    return full_transform
+class ImageFilterTransform(object):
+    def __init__(self):
+        raise NotImplementedError
+    def __call__(self, img):
+        return img.filter(self.filter)
+class GaussianBlur(ImageFilterTransform):
+    def __init__(self, radius=2.):
+        self.filter = ImageFilter.GaussianBlur(radius=radius)
+def collate_fn(data):
+    batched_data = {}
+    for field in data[0].keys():
+        if field in ['annot', 'rec_mat']:
+            batch_values = [item[field] for item in data]
+        else:
+            batch_values = default_collate([d[field] for d in data])
+        if field in ['pixel_features', 'pixel_labels', 'gauss_labels']:
+            batch_values = batch_values.float()
+        batched_data[field] = batch_values
+    return batched_data
+def get_pixel_features(image_size, d_pe=128):
+    all_pe = positional_encoding_2d(d_pe, image_size, image_size)
+    pixels_x = np.arange(0, image_size)
+    pixels_y = np.arange(0, image_size)
+    xv, yv = np.meshgrid(pixels_x, pixels_y)
+    all_pixels = list()
+    for i in range(xv.shape[0]):
+        pixs = np.stack([xv[i], yv[i]], axis=-1)
+        all_pixels.append(pixs)
+    pixels = np.stack(all_pixels, axis=0)
+    pixel_features = all_pe[:, pixels[:, :, 1], pixels[:, :, 0]]
+    pixel_features = pixel_features.permute(1, 2, 0)
+    return pixels, pixel_features

datasets/outdoor_buildings.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import numpy as np
+from datasets.corners import CornersDataset
+import os
+import skimage
+import cv2
+from torchvision import transforms
+from PIL import Image
+from datasets.data_utils import RandomBlur
+class OutdoorBuildingDataset(CornersDataset):
+    def __init__(self, data_path, det_path, phase='train', image_size=256, rand_aug=True,
+                 inference=False):
+        super(OutdoorBuildingDataset, self).__init__(image_size, inference)
+        self.data_path = data_path
+        self.det_path = det_path
+        self.phase = phase
+        self.rand_aug = rand_aug
+        self.image_size = image_size
+        self.inference = inference
+        blur_transform = RandomBlur()
+        self.train_transform = transforms.Compose([
+            transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
+            transforms.RandomGrayscale(p=0.3),
+            blur_transform])
+        if phase == 'train':
+            datalistfile = os.path.join(data_path, 'train_list.txt')
+            self.training = True
+        else:
+            datalistfile = os.path.join(data_path, 'valid_list.txt')
+            self.training = False
+        with open(datalistfile, 'r') as f:
+            _data_names = f.readlines()
+        if phase == 'train':
+            self._data_names = _data_names
+        else:
+            # based on the data split rule from previous works
+            if phase == 'valid':
+                self._data_names = _data_names[:50]
+            elif phase == 'test':
+                self._data_names = _data_names[50:]
+            else:
+                raise ValueError('Invalid phase {}'.format(phase))
+    def __len__(self):
+        return len(self._data_names)
+    def __getitem__(self, idx):
+        data_name = self._data_names[idx][:-1]
+        annot_path = os.path.join(self.data_path, 'annot', data_name + '.npy')
+        annot = np.load(annot_path, allow_pickle=True, encoding='latin1').tolist()
+        det_path = os.path.join(self.det_path, data_name + '.npy')
+        det_corners = np.array(np.load(det_path, allow_pickle=True))  # [N, 2]
+        det_corners = det_corners[:, ::-1]  # turn into x,y format
+        img_path = os.path.join(self.data_path, 'rgb', data_name + '.jpg')
+        rgb = cv2.imread(img_path)
+        if self.image_size != 256:
+            rgb, annot, det_corners = self.resize_data(rgb, annot, det_corners)
+        if self.rand_aug:
+            image, annot, corner_mapping, det_corners = self.random_aug_annot(rgb, annot, det_corners=det_corners)
+        else:
+            image = rgb
+        rec_mat = None
+        corners = np.array(list(annot.keys()))[:, [1, 0]]
+        if not self.inference and len(corners) > 100:
+            new_idx = np.random.randint(0, len(self))
+            return self.__getitem__(new_idx)
+        if self.training:
+            # Add some randomness for g.t. corners
+            corners += np.random.normal(0, 0, size=corners.shape)
+            pil_img = Image.fromarray(image)
+            image = self.train_transform(pil_img)
+            image = np.array(image)
+        image = skimage.img_as_float(image)
+        # sort by the second value and then the first value, here the corners are in the format of (y, x)
+        sort_idx = np.lexsort(corners.T)
+        corners = corners[sort_idx]
+        corner_list = []
+        for corner_i in range(corners.shape[0]):
+            corner_list.append((corners[corner_i][1], corners[corner_i][0]))  # to (x, y) format
+        raw_data = {
+            'name': data_name,
+            'corners': corner_list,
+            'annot': annot,
+            'image': image,
+            'rec_mat': rec_mat,
+            'annot_path': annot_path,
+            'det_path': det_path,
+            'img_path': img_path,
+        }
+        return self.process_data(raw_data)
+    def random_aug_annot(self, img, annot, det_corners=None):
+        # do random flipping
+        img, annot, det_corners = self.random_flip(img, annot, det_corners)
+        # prepare random augmentation parameters (only do random rotation for now)
+        theta = np.random.randint(0, 360) / 360 * np.pi * 2
+        r = self.image_size / 256
+        origin = [127 * r, 127 * r]
+        p1_new = [127 * r + 100 * np.sin(theta) * r, 127 * r - 100 * np.cos(theta) * r]
+        p2_new = [127 * r + 100 * np.cos(theta) * r, 127 * r + 100 * np.sin(theta) * r]
+        p1_old = [127 * r, 127 * r - 100 * r]  # y_axis
+        p2_old = [127 * r + 100 * r, 127 * r]  # x_axis
+        pts1 = np.array([origin, p1_old, p2_old]).astype(np.float32)
+        pts2 = np.array([origin, p1_new, p2_new]).astype(np.float32)
+        M_rot = cv2.getAffineTransform(pts1, pts2)
+        # Combine annotation corners and detection corners
+        all_corners = list(annot.keys())
+        if det_corners is not None:
+            for i in range(det_corners.shape[0]):
+                all_corners.append(tuple(det_corners[i]))
+        all_corners_ = np.array(all_corners)
+        # Do the corner transform within a big matrix transformation
+        corner_mapping = dict()
+        ones = np.ones([all_corners_.shape[0], 1])
+        all_corners_ = np.concatenate([all_corners_, ones], axis=-1)
+        aug_corners = np.matmul(M_rot, all_corners_.T).T
+        for idx, corner in enumerate(all_corners):
+            corner_mapping[corner] = aug_corners[idx]
+        # If the transformed geometry goes beyond image boundary, we simply re-do the augmentation
+        new_corners = np.array(list(corner_mapping.values()))
+        if new_corners.min() <= 0 or new_corners.max() >= (self.image_size - 1):
+            # return self.random_aug_annot(img, annot, det_corners)
+            return img, annot, None, det_corners
+        # build the new annot dict
+        aug_annot = dict()
+        for corner, connections in annot.items():
+            new_corner = corner_mapping[corner]
+            tuple_new_corner = tuple(new_corner)
+            aug_annot[tuple_new_corner] = list()
+            for to_corner in connections:
+                aug_annot[tuple_new_corner].append(corner_mapping[tuple(to_corner)])
+        # Also transform the image correspondingly
+        rows, cols, ch = img.shape
+        new_img = cv2.warpAffine(img, M_rot, (cols, rows), borderValue=(255, 255, 255))
+        y_start = (new_img.shape[0] - self.image_size) // 2
+        x_start = (new_img.shape[1] - self.image_size) // 2
+        aug_img = new_img[y_start:y_start + self.image_size, x_start:x_start + self.image_size, :]
+        if det_corners is None:
+            return aug_img, aug_annot, corner_mapping, None
+        else:
+            aug_det_corners = list()
+            for corner in det_corners:
+                new_corner = corner_mapping[tuple(corner)]
+                aug_det_corners.append(new_corner)
+            aug_det_corners = np.array(aug_det_corners)
+            return aug_img, aug_annot, corner_mapping, aug_det_corners
+if __name__ == '__main__':
+    from torch.utils.data import DataLoader
+    DATAPATH = './data/cities_dataset'
+    DET_PATH = './data/det_final'
+    train_dataset = OutdoorBuildingDataset(DATAPATH, DET_PATH, phase='train')
+    train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0,
+                                  collate_fn=collate_fn)
+    for i, item in enumerate(train_dataloader):
+        import pdb;
+        pdb.set_trace()
+        print(item)

datasets/s3d_floorplans.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import numpy as np
+from datasets.corners import CornersDataset
+import os
+import skimage
+import cv2
+import itertools
+mean = [0.485, 0.456, 0.406]
+std = [0.229, 0.224, 0.225]
+all_combibations = dict()
+for length in range(2, 351):
+    ids = np.arange(length)
+    combs = np.array(list(itertools.combinations(ids, 2)))
+    all_combibations[length] = combs
+class S3DFloorplanDataset(CornersDataset):
+    def __init__(self, data_path, phase='train', image_size=256, rand_aug=True, inference=False):
+        super(S3DFloorplanDataset, self).__init__(image_size, inference)
+        self.data_path = data_path
+        self.phase = phase
+        self.rand_aug = rand_aug
+        if phase == 'train':
+            datalistfile = os.path.join(data_path, 'train_list.txt')
+            self.training = True
+        elif phase == 'valid':
+            datalistfile = os.path.join(data_path, 'valid_list.txt')
+            self.training = False
+        else:
+            datalistfile = os.path.join(data_path, 'test_list.txt')
+            self.training = False
+        with open(datalistfile, 'r') as f:
+            self._data_names = f.readlines()
+    def __len__(self):
+        return len(self._data_names)
+    def __getitem__(self, idx):
+        data_name = self._data_names[idx][:-1]
+        annot_path = os.path.join(self.data_path, 'annot', data_name + '.npy')
+        annot = np.load(annot_path, allow_pickle=True, encoding='latin1').tolist()
+        density_path = os.path.join(self.data_path, 'density', data_name + '.png')
+        normal_path = os.path.join(self.data_path, 'normals', data_name + '.png')
+        density = cv2.imread(density_path)
+        normal = cv2.imread(normal_path)
+        rgb = np.maximum(density, normal)
+        if self.image_size != 256:
+            rgb, annot, det_corners = self.resize_data(rgb, annot, None)
+        if self.rand_aug:
+            image, annot, _ = self.random_aug_annot(rgb, annot, det_corners=None)
+        else:
+            image = rgb
+        rec_mat = None
+        corners = np.array(list(annot.keys()))[:, [1, 0]]
+        if not self.inference and len(corners) > 150:
+            new_idx = np.random.randint(0, len(self))
+            return self.__getitem__(new_idx)
+        if self.training:
+            # Add some randomness for g.t. corners
+            corners += np.random.normal(0, 0, size=corners.shape)
+        image = skimage.img_as_float(image)
+        # sort by the second value and then the first value, here the corners are in the format of (y, x)
+        sort_idx = np.lexsort(corners.T)
+        corners = corners[sort_idx]
+        corner_list = []
+        for corner_i in range(corners.shape[0]):
+            corner_list.append((corners[corner_i][1], corners[corner_i][0]))  # to (x, y) format
+        raw_data = {
+            'name': data_name,
+            'corners': corner_list,
+            'annot': annot,
+            'image': image,
+            'rec_mat': rec_mat,
+            'annot_path': annot_path,
+            'img_path': density_path,
+        }
+        return self.process_data(raw_data)
+    def process_data(self, data):
+        img = data['image']
+        corners = data['corners']
+        annot = data['annot']
+        # pre-process the image to use ImageNet-pretrained backbones
+        img = img.transpose((2, 0, 1))
+        raw_img = img.copy()
+        img = (img - np.array(mean)[:, np.newaxis, np.newaxis]) / np.array(std)[:, np.newaxis, np.newaxis]
+        img = img.astype(np.float32)
+        corners = np.array(corners)
+        all_data = {
+            "annot": annot,
+            "name": data['name'],
+            'img': img,
+            'annot_path': data['annot_path'],
+            'img_path': data['img_path'],
+            'raw_img': raw_img,
+        }
+        # corner labels
+        if not self.inference:
+            pixel_labels, gauss_labels = self.get_corner_labels(corners)
+            all_data['pixel_labels'] = pixel_labels
+            all_data['gauss_labels'] = gauss_labels
+        return all_data
+    def random_aug_annot(self, img, annot, det_corners=None):
+        # do random flipping
+        img, annot, det_corners = self.random_flip(img, annot, det_corners)
+        # return img, annot, None
+        # prepare random augmentation parameters (only do random rotation for now)
+        theta = np.random.randint(0, 360) / 360 * np.pi * 2
+        r = self.image_size / 256
+        origin = [127 * r, 127 * r]
+        p1_new = [127 * r + 100 * np.sin(theta) * r, 127 * r - 100 * np.cos(theta) * r]
+        p2_new = [127 * r + 100 * np.cos(theta) * r, 127 * r + 100 * np.sin(theta) * r]
+        p1_old = [127 * r, 127 * r - 100 * r]  # y_axis
+        p2_old = [127 * r + 100 * r, 127 * r]  # x_axis
+        pts1 = np.array([origin, p1_old, p2_old]).astype(np.float32)
+        pts2 = np.array([origin, p1_new, p2_new]).astype(np.float32)
+        M_rot = cv2.getAffineTransform(pts1, pts2)
+        # Combine annotation corners and detection corners
+        all_corners = list(annot.keys())
+        if det_corners is not None:
+            for i in range(det_corners.shape[0]):
+                all_corners.append(tuple(det_corners[i]))
+        all_corners_ = np.array(all_corners)
+        # Do the per-corner transform
+        # Done in a big matrix transformation to save processing time.
+        corner_mapping = dict()
+        ones = np.ones([all_corners_.shape[0], 1])
+        all_corners_ = np.concatenate([all_corners_, ones], axis=-1)
+        aug_corners = np.matmul(M_rot, all_corners_.T).T
+        for idx, corner in enumerate(all_corners):
+            corner_mapping[corner] = aug_corners[idx]
+        # If the transformed geometry goes beyond image boundary, we simply re-do the augmentation
+        new_corners = np.array(list(corner_mapping.values()))
+        if new_corners.min() <= 0 or new_corners.max() >= (self.image_size - 1):
+            # return self.random_aug_annot(img, annot, det_corners)
+            return img, annot, None
+        # build the new annot dict
+        aug_annot = dict()
+        for corner, connections in annot.items():
+            new_corner = corner_mapping[corner]
+            tuple_new_corner = tuple(new_corner)
+            aug_annot[tuple_new_corner] = list()
+            for to_corner in connections:
+                aug_annot[tuple_new_corner].append(corner_mapping[tuple(to_corner)])
+        # Also transform the image correspondingly
+        rows, cols, ch = img.shape
+        new_img = cv2.warpAffine(img, M_rot, (cols, rows), borderValue=(255, 255, 255))
+        y_start = (new_img.shape[0] - self.image_size) // 2
+        x_start = (new_img.shape[1] - self.image_size) // 2
+        aug_img = new_img[y_start:y_start + self.image_size, x_start:x_start + self.image_size, :]
+        return aug_img, aug_annot, None

images/test.jpg ADDED Viewed

infer.py ADDED Viewed

	@@ -0,0 +1,455 @@

+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from datasets.outdoor_buildings import OutdoorBuildingDataset
+from datasets.s3d_floorplans import S3DFloorplanDataset
+from datasets.data_utils import collate_fn, get_pixel_features
+from models.resnet import ResNetBackbone
+from models.corner_models import HeatCorner
+from models.edge_models import HeatEdge
+from models.corner_to_edge import get_infer_edge_pairs
+from utils.geometry_utils import corner_eval
+import numpy as np
+import cv2
+import os
+import scipy.ndimage.filters as filters
+import matplotlib.pyplot as plt
+from metrics.get_metric import compute_metrics, get_recall_and_precision
+import skimage
+import argparse
+def visualize_cond_generation(positive_pixels, confs, image, save_path, gt_corners=None, prec=None, recall=None,
+                              image_masks=None, edges=None, edge_confs=None):
+    image = image.copy()  # get a new copy of the original image
+    if confs is not None:
+        viz_confs = confs
+    if edges is not None:
+        preds = positive_pixels.astype(int)
+        c_degrees = dict()
+        for edge_i, edge_pair in enumerate(edges):
+            conf = (edge_confs[edge_i] * 2) - 1
+            cv2.line(image, tuple(preds[edge_pair[0]]), tuple(preds[edge_pair[1]]), (255 * conf, 255 * conf, 0), 2)
+            c_degrees[edge_pair[0]] = c_degrees.setdefault(edge_pair[0], 0) + 1
+            c_degrees[edge_pair[1]] = c_degrees.setdefault(edge_pair[1], 0) + 1
+    for idx, c in enumerate(positive_pixels):
+        if edges is not None and idx not in c_degrees:
+            continue
+        if confs is None:
+            cv2.circle(image, (int(c[0]), int(c[1])), 3, (0, 0, 255), -1)
+        else:
+            cv2.circle(image, (int(c[0]), int(c[1])), 3, (0, 0, 255 * viz_confs[idx]), -1)
+        # if edges is not None:
+        #    cv2.putText(image, '{}'.format(c_degrees[idx]), (int(c[0]), int(c[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX,
+        #                0.5, (255, 0, 0), 1, cv2.LINE_AA)
+    if gt_corners is not None:
+        for c in gt_corners:
+            cv2.circle(image, (int(c[0]), int(c[1])), 3, (0, 255, 0), -1)
+    if image_masks is not None:
+        mask_ids = np.where(image_masks == 1)[0]
+        for mask_id in mask_ids:
+            y_idx = mask_id // 64
+            x_idx = (mask_id - y_idx * 64)
+            x_coord = x_idx * 4
+            y_coord = y_idx * 4
+            cv2.rectangle(image, (x_coord, y_coord), (x_coord + 3, y_coord + 3), (127, 127, 0), thickness=-1)
+    # if confs is not None:
+    #    cv2.putText(image, 'max conf: {:.3f}'.format(confs.max()), (20, 20), cv2.FONT_HERSHEY_SIMPLEX,
+    #                0.5, (255, 255, 0), 1, cv2.LINE_AA)
+    if prec is not None:
+        if isinstance(prec, tuple):
+            cv2.putText(image, 'edge p={:.2f}, edge r={:.2f}'.format(prec[0], recall[0]), (20, 20),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.5, (255, 255, 0), 1, cv2.LINE_AA)
+            cv2.putText(image, 'region p={:.2f}, region r={:.2f}'.format(prec[1], recall[1]), (20, 40),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.5, (255, 255, 0), 1, cv2.LINE_AA)
+        else:
+            cv2.putText(image, 'prec={:.2f}, recall={:.2f}'.format(prec, recall), (20, 20), cv2.FONT_HERSHEY_SIMPLEX,
+                        0.5, (255, 255, 0), 1, cv2.LINE_AA)
+    cv2.imwrite(save_path, image)
+def corner_nms(preds, confs, image_size):
+    data = np.zeros([image_size, image_size])
+    neighborhood_size = 5
+    threshold = 0
+    for i in range(len(preds)):
+        data[preds[i, 1], preds[i, 0]] = confs[i]
+    data_max = filters.maximum_filter(data, neighborhood_size)
+    maxima = (data == data_max)
+    data_min = filters.minimum_filter(data, neighborhood_size)
+    diff = ((data_max - data_min) > threshold)
+    maxima[diff == 0] = 0
+    results = np.where(maxima > 0)
+    filtered_preds = np.stack([results[1], results[0]], axis=-1)
+    new_confs = list()
+    for i, pred in enumerate(filtered_preds):
+        new_confs.append(data[pred[1], pred[0]])
+    new_confs = np.array(new_confs)
+    return filtered_preds, new_confs
+def main(dataset, ckpt_path, image_size, viz_base, save_base, infer_times):
+    ckpt = torch.load(ckpt_path)
+    print('Load from ckpts of epoch {}'.format(ckpt['epoch']))
+    ckpt_args = ckpt['args']
+    if dataset == 'outdoor':
+        data_path = './data/outdoor/cities_dataset'
+        det_path = './data/outdoor/det_final'
+        test_dataset = OutdoorBuildingDataset(data_path, det_path, phase='test', image_size=image_size, rand_aug=False,
+                                              inference=True)
+    elif dataset == 's3d_floorplan':
+        data_path = './data/s3d_floorplan'
+        test_dataset = S3DFloorplanDataset(data_path, phase='test', rand_aug=False, inference=True)
+    else:
+        raise ValueError('Unknown dataset type: {}'.format(dataset))
+    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0,
+                                 collate_fn=collate_fn)
+    backbone = ResNetBackbone()
+    strides = backbone.strides
+    num_channels = backbone.num_channels
+    backbone = nn.DataParallel(backbone)
+    backbone = backbone.cuda()
+    backbone.eval()
+    corner_model = HeatCorner(input_dim=128, hidden_dim=256, num_feature_levels=4, backbone_strides=strides,
+                              backbone_num_channels=num_channels)
+    corner_model = nn.DataParallel(corner_model)
+    corner_model = corner_model.cuda()
+    corner_model.eval()
+    edge_model = HeatEdge(input_dim=128, hidden_dim=256, num_feature_levels=4, backbone_strides=strides,
+                          backbone_num_channels=num_channels)
+    edge_model = nn.DataParallel(edge_model)
+    edge_model = edge_model.cuda()
+    edge_model.eval()
+    backbone.load_state_dict(ckpt['backbone'])
+    corner_model.load_state_dict(ckpt['corner_model'])
+    edge_model.load_state_dict(ckpt['edge_model'])
+    print('Loaded saved model from {}'.format(ckpt_path))
+    if not os.path.exists(viz_base):
+        os.makedirs(viz_base)
+    if not os.path.exists(save_base):
+        os.makedirs(save_base)
+    all_prec = list()
+    all_recall = list()
+    corner_tp = 0.0
+    corner_fp = 0.0
+    corner_length = 0.0
+    edge_tp = 0.0
+    edge_fp = 0.0
+    edge_length = 0.0
+    region_tp = 0.0
+    region_fp = 0.0
+    region_length = 0.0
+    # get the positional encodings for all pixels
+    pixels, pixel_features = get_pixel_features(image_size=image_size)
+    for data_i, data in enumerate(test_dataloader):
+        image = data['img'].cuda()
+        img_path = data['img_path'][0]
+        annot_path = data['annot_path'][0]
+        annot = np.load(annot_path, allow_pickle=True, encoding='latin1').tolist()
+        with torch.no_grad():
+            pred_corners, pred_confs, pos_edges, edge_confs, c_outputs_np = get_results(image, annot, backbone,
+                                                                                        corner_model,
+                                                                                        edge_model,
+                                                                                        pixels, pixel_features,
+                                                                                        ckpt_args, infer_times,
+                                                                                        corner_thresh=0.01,
+                                                                                        image_size=image_size)
+        # viz_image = cv2.imread(img_path)
+        positive_pixels = np.array(list(annot.keys())).round()
+        viz_image = data['raw_img'][0].cpu().numpy().transpose(1, 2, 0)
+        viz_image = (viz_image * 255).astype(np.uint8)
+        # visualize G.T.
+        gt_path = os.path.join(viz_base, '{}_gt.png'.format(data_i))
+        visualize_cond_generation(positive_pixels, None, viz_image, gt_path, gt_corners=None, image_masks=None)
+        if len(pred_corners) > 0:
+            prec, recall = corner_eval(positive_pixels, pred_corners)
+        else:
+            prec = recall = 0
+        all_prec.append(prec)
+        all_recall.append(recall)
+        if pred_confs.shape[0] == 0:
+            pred_confs = None
+        if image_size != 256:
+            pred_corners_viz = pred_corners * (image_size / 256)
+        else:
+            pred_corners_viz = pred_corners
+        recon_path = os.path.join(viz_base, '{}_pred_corner.png'.format(data_i))
+        visualize_cond_generation(pred_corners_viz, pred_confs, viz_image, recon_path, gt_corners=None, prec=prec,
+                                  recall=recall)
+        pred_corners, pred_confs, pos_edges = postprocess_preds(pred_corners, pred_confs, pos_edges)
+        pred_data = {
+            'corners': pred_corners,
+            'edges': pos_edges,
+        }
+        if dataset == 's3d_floorplan':
+            save_filename = os.path.basename(annot_path)
+            save_npy_path = os.path.join(save_base, save_filename)
+            np.save(save_npy_path, pred_data)
+        else:
+            save_results = {
+                'corners': pred_corners,
+                'edges': pos_edges,
+                'image_path': img_path,
+            }
+            save_path = os.path.join(save_base, '{}_results.npy'.format(data_i))
+            np.save(save_path, save_results)
+        gt_data = convert_annot(annot)
+        score = compute_metrics(gt_data, pred_data)
+        edge_recall, edge_prec = get_recall_and_precision(score['edge_tp'], score['edge_fp'], score['edge_length'])
+        region_recall, region_prec = get_recall_and_precision(score['region_tp'], score['region_fp'],
+                                                              score['region_length'])
+        er_recall = (edge_recall, region_recall)
+        er_prec = (edge_prec, region_prec)
+        if image_size != 256:
+            pred_corners_viz = pred_corners * (image_size / 256)
+        else:
+            pred_corners_viz = pred_corners
+        recon_path = os.path.join(viz_base, '{}_pred_edge.png'.format(data_i))
+        visualize_cond_generation(pred_corners_viz, pred_confs, viz_image, recon_path, gt_corners=None, prec=er_prec,
+                                  recall=er_recall, edges=pos_edges, edge_confs=edge_confs)
+        corner_tp += score['corner_tp']
+        corner_fp += score['corner_fp']
+        corner_length += score['corner_length']
+        edge_tp += score['edge_tp']
+        edge_fp += score['edge_fp']
+        edge_length += score['edge_length']
+        region_tp += score['region_tp']
+        region_fp += score['region_fp']
+        region_length += score['region_length']
+        print('Finish inference for sample No.{}'.format(data_i))
+    avg_prec = np.array(all_prec).mean()
+    avg_recall = np.array(all_recall).mean()
+    recall, precision = get_recall_and_precision(corner_tp, corner_fp, corner_length)
+    f_score = 2.0 * precision * recall / (recall + precision + 1e-8)
+    print('corners - precision: %.3f recall: %.3f f_score: %.3f' % (precision, recall, f_score))
+    # edge
+    recall, precision = get_recall_and_precision(edge_tp, edge_fp, edge_length)
+    f_score = 2.0 * precision * recall / (recall + precision + 1e-8)
+    print('edges - precision: %.3f recall: %.3f f_score: %.3f' % (precision, recall, f_score))
+    # region
+    recall, precision = get_recall_and_precision(region_tp, region_fp, region_length)
+    f_score = 2.0 * precision * recall / (recall + precision + 1e-8)
+    print('regions - precision: %.3f recall: %.3f f_score: %.3f' % (precision, recall, f_score))
+    print('Avg prec: {}, Avg recall: {}'.format(avg_prec, avg_recall))
+def get_results(image, annot, backbone, corner_model, edge_model, pixels, pixel_features,
+                args, infer_times, corner_thresh=0.5, image_size=256):
+    image_feats, feat_mask, all_image_feats = backbone(image)
+    pixel_features = pixel_features.unsqueeze(0).repeat(image.shape[0], 1, 1, 1)
+    preds_s1 = corner_model(image_feats, feat_mask, pixel_features, pixels, all_image_feats)
+    c_outputs = preds_s1
+    # get predicted corners
+    c_outputs_np = c_outputs[0].detach().cpu().numpy()
+    pos_indices = np.where(c_outputs_np >= corner_thresh)
+    pred_corners = pixels[pos_indices]
+    pred_confs = c_outputs_np[pos_indices]
+    pred_corners, pred_confs = corner_nms(pred_corners, pred_confs, image_size=c_outputs.shape[1])
+    pred_corners, pred_confs, edge_coords, edge_mask, edge_ids = get_infer_edge_pairs(pred_corners, pred_confs)
+    corner_nums = torch.tensor([len(pred_corners)]).to(image.device)
+    max_candidates = torch.stack([corner_nums.max() * args.corner_to_edge_multiplier] * len(corner_nums), dim=0)
+    all_pos_ids = set()
+    all_edge_confs = dict()
+    for tt in range(infer_times):
+        if tt == 0:
+            gt_values = torch.zeros_like(edge_mask).long()
+            gt_values[:, :] = 2
+        # run the edge model
+        s1_logits, s2_logits_hb, s2_logits_rel, selected_ids, s2_mask, s2_gt_values = edge_model(image_feats, feat_mask,
+                                                                                                 pixel_features,
+                                                                                                 edge_coords, edge_mask,
+                                                                                                 gt_values, corner_nums,
+                                                                                                 max_candidates,
+                                                                                                 True)
+        # do_inference=True)
+        num_total = s1_logits.shape[2]
+        num_selected = selected_ids.shape[1]
+        num_filtered = num_total - num_selected
+        s1_preds = s1_logits.squeeze().softmax(0)
+        s2_preds_rel = s2_logits_rel.squeeze().softmax(0)
+        s2_preds_hb = s2_logits_hb.squeeze().softmax(0)
+        s1_preds_np = s1_preds[1, :].detach().cpu().numpy()
+        s2_preds_rel_np = s2_preds_rel[1, :].detach().cpu().numpy()
+        s2_preds_hb_np = s2_preds_hb[1, :].detach().cpu().numpy()
+        selected_ids = selected_ids.squeeze().detach().cpu().numpy()
+        if tt != infer_times - 1:
+            s2_preds_np = s2_preds_hb_np
+            pos_edge_ids = np.where(s2_preds_np >= 0.9)
+            neg_edge_ids = np.where(s2_preds_np <= 0.01)
+            for pos_id in pos_edge_ids[0]:
+                actual_id = selected_ids[pos_id]
+                if gt_values[0, actual_id] != 2:
+                    continue
+                all_pos_ids.add(actual_id)
+                all_edge_confs[actual_id] = s2_preds_np[pos_id]
+                gt_values[0, actual_id] = 1
+            for neg_id in neg_edge_ids[0]:
+                actual_id = selected_ids[neg_id]
+                if gt_values[0, actual_id] != 2:
+                    continue
+                gt_values[0, actual_id] = 0
+            num_to_pred = (gt_values == 2).sum()
+            if num_to_pred <= num_filtered:
+                break
+        else:
+            s2_preds_np = s2_preds_hb_np
+            pos_edge_ids = np.where(s2_preds_np >= 0.5)
+            for pos_id in pos_edge_ids[0]:
+                actual_id = selected_ids[pos_id]
+                if s2_mask[0][pos_id] is True or gt_values[0, actual_id] != 2:
+                    continue
+                all_pos_ids.add(actual_id)
+                all_edge_confs[actual_id] = s2_preds_np[pos_id]
+    # print('Inference time {}'.format(tt+1))
+    pos_edge_ids = list(all_pos_ids)
+    edge_confs = [all_edge_confs[idx] for idx in pos_edge_ids]
+    pos_edges = edge_ids[pos_edge_ids].cpu().numpy()
+    edge_confs = np.array(edge_confs)
+    if image_size != 256:
+        pred_corners = pred_corners / (image_size / 256)
+    return pred_corners, pred_confs, pos_edges, edge_confs, c_outputs_np
+def postprocess_preds(corners, confs, edges):
+    corner_degrees = dict()
+    for edge_i, edge_pair in enumerate(edges):
+        corner_degrees[edge_pair[0]] = corner_degrees.setdefault(edge_pair[0], 0) + 1
+        corner_degrees[edge_pair[1]] = corner_degrees.setdefault(edge_pair[1], 0) + 1
+    good_ids = [i for i in range(len(corners)) if i in corner_degrees]
+    if len(good_ids) == len(corners):
+        return corners, confs, edges
+    else:
+        good_corners = corners[good_ids]
+        good_confs = confs[good_ids]
+        id_mapping = {value: idx for idx, value in enumerate(good_ids)}
+        new_edges = list()
+        for edge_pair in edges:
+            new_pair = (id_mapping[edge_pair[0]], id_mapping[edge_pair[1]])
+            new_edges.append(new_pair)
+        new_edges = np.array(new_edges)
+        return good_corners, good_confs, new_edges
+def process_image(img):
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    img = skimage.img_as_float(img)
+    img = img.transpose((2, 0, 1))
+    img = (img - np.array(mean)[:, np.newaxis, np.newaxis]) / np.array(std)[:, np.newaxis, np.newaxis]
+    img = torch.Tensor(img).cuda()
+    img = img.unsqueeze(0)
+    return img
+def plot_heatmap(results, filename):
+    # generate 2 2d grids for the x & y bounds
+    # import pdb; pdb.set_trace()
+    y, x = np.meshgrid(np.linspace(0, 255, 256), np.linspace(0, 255, 256))
+    z = results[::-1, :]
+    # x and y are bounds, so z should be the value *inside* those bounds.
+    # Therefore, remove the last value from the z array.
+    z = z[:-1, :-1]
+    fig, ax = plt.subplots()
+    c = ax.pcolormesh(y, x, z, cmap='RdBu', vmin=0, vmax=1)
+    # set the limits of the plot to the limits of the data
+    ax.axis([x.min(), x.max(), y.min(), y.max()])
+    fig.colorbar(c, ax=ax)
+    fig.savefig(filename)
+    plt.close()
+def convert_annot(annot):
+    corners = np.array(list(annot.keys()))
+    corners_mapping = {tuple(c): idx for idx, c in enumerate(corners)}
+    edges = set()
+    for corner, connections in annot.items():
+        idx_c = corners_mapping[tuple(corner)]
+        for other_c in connections:
+            idx_other_c = corners_mapping[tuple(other_c)]
+            if (idx_c, idx_other_c) not in edges and (idx_other_c, idx_c) not in edges:
+                edges.add((idx_c, idx_other_c))
+    edges = np.array(list(edges))
+    gt_data = {
+        'corners': corners,
+        'edges': edges
+    }
+    return gt_data
+def get_args_parser():
+    parser = argparse.ArgumentParser('Holistic edge attention transformer', add_help=False)
+    parser.add_argument('--dataset', default='outdoor',
+                        help='the dataset for experiments, outdoor/s3d_floorplan')
+    parser.add_argument('--checkpoint_path', default='',
+                        help='path to the checkpoints of the model')
+    parser.add_argument('--image_size', default=256, type=int)
+    parser.add_argument('--viz_base', default='./results/viz',
+                        help='path to save the intermediate visualizations')
+    parser.add_argument('--save_base', default='./results/npy',
+                        help='path to save the prediction results in npy files')
+    parser.add_argument('--infer_times', default=3, type=int)
+    return parser
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser('HEAT inference', parents=[get_args_parser()])
+    args = parser.parse_args()
+    main(args.dataset, args.checkpoint_path, args.image_size, args.viz_base, args.save_base,
+         infer_times=args.infer_times)

metrics/get_metric.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import os
+import numpy as np
+import pickle
+import cv2
+from metrics.new_utils import *
+class Metric():
+    def calc(self, gt_data, conv_data, thresh=8.0, iou_thresh=0.7):
+        ### compute corners precision/recall
+        gts = gt_data['corners']
+        dets = conv_data['corners']
+        per_sample_corner_tp = 0.0
+        per_sample_corner_fp = 0.0
+        per_sample_corner_length = gts.shape[0]
+        found = [False] * gts.shape[0]
+        c_det_annot = {}
+        # for each corner detection
+        for i, det in enumerate(dets):
+            # get closest gt
+            near_gt = [0, 999999.0, (0.0, 0.0)]
+            for k, gt in enumerate(gts):
+                dist = np.linalg.norm(gt - det)
+                if dist < near_gt[1]:
+                    near_gt = [k, dist, gt]
+            if near_gt[1] <= thresh and not found[near_gt[0]]:
+                per_sample_corner_tp += 1.0
+                found[near_gt[0]] = True
+                c_det_annot[i] = near_gt[0]
+            else:
+                per_sample_corner_fp += 1.0
+        per_corner_score = {
+            'recall': per_sample_corner_tp / gts.shape[0],
+            'precision': per_sample_corner_tp / (per_sample_corner_tp + per_sample_corner_fp + 1e-8)
+        }
+        ### compute edges precision/recall
+        per_sample_edge_tp = 0.0
+        per_sample_edge_fp = 0.0
+        edge_corner_annots = gt_data['edges']
+        per_sample_edge_length = edge_corner_annots.shape[0]
+        false_edge_ids = []
+        match_gt_ids = set()
+        for l, e_det in enumerate(conv_data['edges']):
+            c1, c2 = e_det
+            # check if corners are mapped
+            if (c1 not in c_det_annot.keys()) or (c2 not in c_det_annot.keys()):
+                per_sample_edge_fp += 1.0
+                false_edge_ids.append(l)
+                continue
+            # check hit
+            c1_prime = c_det_annot[c1]
+            c2_prime = c_det_annot[c2]
+            is_hit = False
+            for k, e_annot in enumerate(edge_corner_annots):
+                c3, c4 = e_annot
+                if ((c1_prime == c3) and (c2_prime == c4)) or ((c1_prime == c4) and (c2_prime == c3)):
+                    is_hit = True
+                    match_gt_ids.add(k)
+                    break
+            # hit
+            if is_hit:
+                per_sample_edge_tp += 1.0
+            else:
+                per_sample_edge_fp += 1.0
+                false_edge_ids.append(l)
+        per_edge_score = {
+            'recall': per_sample_edge_tp / edge_corner_annots.shape[0],
+            'precision': per_sample_edge_tp / (per_sample_edge_tp + per_sample_edge_fp + 1e-8)
+        }
+        # computer regions precision/recall
+        conv_mask = render(corners=conv_data['corners'], edges=conv_data['edges'], render_pad=0, edge_linewidth=1)[0]
+        conv_mask = 1 - conv_mask
+        conv_mask = conv_mask.astype(np.uint8)
+        labels, region_mask = cv2.connectedComponents(conv_mask, connectivity=4)
+        #cv2.imwrite('mask-pred.png', region_mask.astype(np.uint8) * 20)
+        background_label = region_mask[0, 0]
+        all_conv_masks = []
+        for region_i in range(1, labels):
+            if region_i == background_label:
+                continue
+            the_region = region_mask == region_i
+            if the_region.sum() < 20:
+                continue
+            all_conv_masks.append(the_region)
+        gt_mask = render(corners=gt_data['corners'], edges=gt_data['edges'], render_pad=0, edge_linewidth=1)[0]
+        gt_mask = 1 - gt_mask
+        gt_mask = gt_mask.astype(np.uint8)
+        labels, region_mask = cv2.connectedComponents(gt_mask, connectivity=4)
+        #cv2.imwrite('mask-gt.png', region_mask.astype(np.uint8) * 20)
+        background_label = region_mask[0, 0]
+        all_gt_masks = []
+        for region_i in range(1, labels):
+            if region_i == background_label:
+                continue
+            the_region = region_mask == region_i
+            if the_region.sum() < 20:
+                continue
+            all_gt_masks.append(the_region)
+        per_sample_region_tp = 0.0
+        per_sample_region_fp = 0.0
+        per_sample_region_length = len(all_gt_masks)
+        found = [False] * len(all_gt_masks)
+        for i, r_det in enumerate(all_conv_masks):
+            # gt closest gt
+            near_gt = [0, 0, None]
+            for k, r_gt in enumerate(all_gt_masks):
+                iou = np.logical_and(r_gt, r_det).sum() / float(np.logical_or(r_gt, r_det).sum())
+                if iou > near_gt[1]:
+                    near_gt = [k, iou, r_gt]
+            if near_gt[1] >= iou_thresh and not found[near_gt[0]]:
+                per_sample_region_tp += 1.0
+                found[near_gt[0]] = True
+            else:
+                per_sample_region_fp += 1.0
+        per_region_score = {
+            'recall': per_sample_region_tp / len(all_gt_masks),
+            'precision': per_sample_region_tp / (per_sample_region_tp + per_sample_region_fp + 1e-8)
+        }
+        return {
+            'corner_tp': per_sample_corner_tp,
+            'corner_fp': per_sample_corner_fp,
+            'corner_length': per_sample_corner_length,
+            'edge_tp': per_sample_edge_tp,
+            'edge_fp': per_sample_edge_fp,
+            'edge_length': per_sample_edge_length,
+            'region_tp': per_sample_region_tp,
+            'region_fp': per_sample_region_fp,
+            'region_length': per_sample_region_length,
+            'corner': per_corner_score,
+            'edge': per_edge_score,
+            'region': per_region_score
+        }
+def compute_metrics(gt_data, pred_data):
+    metric = Metric()
+    score = metric.calc(gt_data, pred_data)
+    return score
+def get_recall_and_precision(tp, fp, length):
+    recall = tp / (length + 1e-8)
+    precision = tp / (tp + fp + 1e-8)
+    return recall, precision
+if __name__ == '__main__':
+    base_path = './'
+    gt_datapath = '../data/cities_dataset/annot'
+    metric = Metric()
+    corner_tp = 0.0
+    corner_fp = 0.0
+    corner_length = 0.0
+    edge_tp = 0.0
+    edge_fp = 0.0
+    edge_length = 0.0
+    region_tp = 0.0
+    region_fp = 0.0
+    region_length = 0.0
+    for file_name in os.listdir(base_path):
+        if len(file_name) < 10:
+            continue
+        f = open(os.path.join(base_path, file_name), 'rb')
+        gt_data = np.load(os.path.join(gt_datapath, file_name + '.npy'), allow_pickle=True).tolist()
+        candidate = pickle.load(f)
+        conv_corners = candidate.graph.getCornersArray()
+        conv_edges = candidate.graph.getEdgesArray()
+        conv_data = {'corners': conv_corners, 'edges': conv_edges}
+        score = metric.calc(gt_data, conv_data)
+        corner_tp += score['corner_tp']
+        corner_fp += score['corner_fp']
+        corner_length += score['corner_length']
+        edge_tp += score['edge_tp']
+        edge_fp += score['edge_fp']
+        edge_length += score['edge_length']
+        region_tp += score['region_tp']
+        region_fp += score['region_fp']
+        region_length += score['region_length']
+    f = open(os.path.join(base_path, 'score.txt'), 'w')
+    # corner
+    recall, precision = get_recall_and_precision(corner_tp, corner_fp, corner_length)
+    f_score = 2.0 * precision * recall / (recall + precision + 1e-8)
+    print('corners - precision: %.3f recall: %.3f f_score: %.3f' % (precision, recall, f_score))
+    f.write('corners - precision: %.3f recall: %.3f f_score: %.3f\n' % (precision, recall, f_score))
+    # edge
+    recall, precision = get_recall_and_precision(edge_tp, edge_fp, edge_length)
+    f_score = 2.0 * precision * recall / (recall + precision + 1e-8)
+    print('edges - precision: %.3f recall: %.3f f_score: %.3f' % (precision, recall, f_score))
+    f.write('edges - precision: %.3f recall: %.3f f_score: %.3f\n' % (precision, recall, f_score))
+    # region
+    recall, precision = get_recall_and_precision(region_tp, region_fp, region_length)
+    f_score = 2.0 * precision * recall / (recall + precision + 1e-8)
+    print('regions - precision: %.3f recall: %.3f f_score: %.3f' % (precision, recall, f_score))
+    f.write('regions - precision: %.3f recall: %.3f f_score: %.3f\n' % (precision, recall, f_score))
+    f.close()

metrics/new_utils.py ADDED Viewed

	@@ -0,0 +1,2100 @@

+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+import threading
+import os
+import skimage
+import random
+import time
+TWO_CORNER_MINIMUM_DISTANCE = 5
+SAFE_NUM = 3
+score_weights = (1., 2., 100.)
+#########################################################################################
+################################# General Functions #####################################
+#########################################################################################
+def swap_two_corner_place(corners, edges, id1, id2):
+    for edge_i in range(edges.shape[0]):
+        if edges[edge_i, 0] == id1:
+            edges[edge_i, 0] = id2
+        elif edges[edge_i, 0] == id2:
+            edges[edge_i, 0] = id1
+        if edges[edge_i, 1] == id1:
+            edges[edge_i, 1] = id2
+        elif edges[edge_i, 1] == id2:
+            edges[edge_i, 1] = id1
+    temp = corners[id1].copy()
+    corners[id1] = corners[id2]
+    corners[id2] = temp
+    return corners, edges
+def get_neighbor_corner_id(corner_id, edges):
+    where = np.where(edges == corner_id)
+    return edges[where[0], 1 - where[1]]
+def swap_two_edge_place(edges, id1, id2):
+    temp = edges[id1].copy()
+    edges[id1] = edges[id2]
+    edges[id2] = temp
+    return edges
+def degree_of_three_corners(cornerA, cornerB, cornerM):
+    # cornerM is middle corner
+    AM_length = l2_distance(cornerA, cornerM)
+    BM_length = l2_distance(cornerB, cornerM)
+    dot = np.dot((cornerA[0] - cornerM[0], cornerA[1] - cornerM[1]),
+                 (cornerB[0] - cornerM[0], cornerB[1] - cornerM[1]))
+    cos = dot / (AM_length + 1e-8) / (BM_length + 1e-8)
+    cos = min(1, max(-1, cos))
+    degree = np.arccos(cos)
+    return degree / np.pi * 180
+def sort_graph(corners, edges):
+    corners = corners.copy()
+    edges = edges.copy()
+    for corner_i in range(corners.shape[0]):
+        min_id = -1
+        min_pos = corners[corner_i]
+        for corner_j in range(corner_i + 1, corners.shape[0]):
+            if (corners[corner_j, 0] < min_pos[0]) or \
+                    (corners[corner_j, 0] == min_pos[0] and corners[corner_j, 1] < min_pos[1]):
+                min_pos = corners[corner_j]
+                min_id = corner_j
+        if min_id != -1:
+            corners, edges = swap_two_corner_place(corners, edges, corner_i, min_id)
+    for edge_i in range(edges.shape[0]):
+        if edges[edge_i, 0] > edges[edge_i, 1]:
+            temp = edges[edge_i, 0]
+            edges[edge_i, 0] = edges[edge_i, 1]
+            edges[edge_i, 1] = temp
+    for edge_i in range(edges.shape[0]):
+        min_id = -1
+        min_pos = edges[edge_i]
+        for edge_j in range(edge_i + 1, edges.shape[0]):
+            if (edges[edge_j, 0] < min_pos[0]) or \
+                    (edges[edge_j, 0] == min_pos[0] and edges[edge_j, 1] < min_pos[1]):
+                min_pos = edges[edge_j]
+                min_id = edge_j
+        if min_id != -1:
+            edges = swap_two_edge_place(edges, edge_i, min_id)
+    return corners, edges
+def IOU(maskA, maskB):
+    return np.logical_and(maskA, maskB).sum() / np.logical_or(maskA, maskB).sum()
+def render(corners, edges, render_pad=0, edge_linewidth=2, corner_size=3, scale=1.):
+    size = int(256 * scale)
+    mask = np.ones((2, size, size)) * render_pad
+    corners = np.round(corners.copy() * scale).astype(np.int)
+    for edge_i in range(edges.shape[0]):
+        a = edges[edge_i, 0]
+        b = edges[edge_i, 1]
+        mask[0] = cv2.line(mask[0], (int(corners[a, 1]), int(corners[a, 0])),
+                           (int(corners[b, 1]), int(corners[b, 0])), 1.0, thickness=edge_linewidth)
+    for corner_i in range(corners.shape[0]):
+        mask[1] = cv2.circle(mask[1], (int(corners[corner_i, 1]), int(corners[corner_i, 0])), corner_size, 1.0, -1)
+    return mask
+def patch_samples(edge_num, batch_size):
+    num = edge_num // batch_size
+    patchs = []
+    for i in range(num):
+        patchs.append([i * batch_size + j for j in range(batch_size)])
+    if edge_num % batch_size != 0:
+        patchs.append([j for j in range(batch_size * num, edge_num)])
+    return patchs
+def l2_distance(x1, x2):
+    return np.sqrt((x1[0] - x2[0]) ** 2 + (x1[1] - x2[1]) ** 2)
+def triangle_region(A, B, C):
+    l1 = np.linalg.norm(np.array(A) - np.array(B))
+    l2 = np.linalg.norm(np.array(A) - np.array(C))
+    l3 = np.linalg.norm(np.array(B) - np.array(C))
+    p = (l1 + l2 + l3) / 2
+    area = np.sqrt(np.abs(p * (p - l1) * (p - l2) * (p - l3)))
+    return area
+def remove_intersection_and_duplicate(corners, edges, name):
+    over_all_flag = False
+    ori_corners = corners.copy()
+    ori_edges = edges.copy()
+    while True:
+        flag = False
+        for edge_i in range(edges.shape[0]):
+            for edge_j in range(edge_i + 1, edges.shape[0]):
+                corner11 = corners[edges[edge_i, 0]]
+                corner12 = corners[edges[edge_i, 1]]
+                corner21 = corners[edges[edge_j, 0]]
+                corner22 = corners[edges[edge_j, 1]]
+                y1 = corner11[0]
+                x1 = corner11[1]
+                y2 = corner12[0]
+                x2 = corner12[1]
+                a1 = y1 - y2
+                b1 = x2 - x1
+                c1 = x1 * y2 - x2 * y1
+                flag1 = (a1 * corner21[1] + b1 * corner21[0] + c1) * (a1 * corner22[1] + b1 * corner22[0] + c1)
+                y1 = corner21[0]
+                x1 = corner21[1]
+                y2 = corner22[0]
+                x2 = corner22[1]
+                a2 = y1 - y2
+                b2 = x2 - x1
+                c2 = x1 * y2 - x2 * y1
+                flag2 = (a2 * corner11[1] + b2 * corner11[0] + c2) * (a2 * corner12[1] + b2 * corner12[0] + c2)
+                if flag1 < -1e-5 and flag2 < -1e-5:
+                    # intersection!
+                    over_all_flag = True
+                    flag = True
+                    new_x = (c2 * b1 - c1 * b2) / (a1 * b2 - a2 * b1)
+                    new_y = (a2 * c1 - a1 * c2) / (a1 * b2 - a2 * b1)
+                    temp_d = 3
+                    temp_id = -1
+                    if l2_distance((new_y, new_x), corner11) < temp_d:
+                        temp_id = edges[edge_i, 0]
+                        temp_d = l2_distance((new_y, new_x), corner11)
+                    if l2_distance((new_y, new_x), corner12) < temp_d:
+                        temp_id = edges[edge_i, 1]
+                        temp_d = l2_distance((new_y, new_x), corner12)
+                    if l2_distance((new_y, new_x), corner21) < temp_d:
+                        temp_id = edges[edge_j, 0]
+                        temp_d = l2_distance((new_y, new_x), corner21)
+                    if l2_distance((new_y, new_x), corner22) < temp_d:
+                        temp_id = edges[edge_j, 1]
+                        temp_d = l2_distance((new_y, new_x), corner22)
+                    if temp_id != -1:
+                        if edges[edge_i, 0] != temp_id and edges[edge_i, 1] != temp_id:
+                            tt = edges[edge_i, 0]
+                            edges[edge_i, 0] = temp_id
+                            edges = np.append(edges, np.array([(temp_id, tt)]), 0)
+                        if edges[edge_j, 0] != temp_id and edges[edge_j, 1] != temp_id:
+                            tt = edges[edge_j, 0]
+                            edges[edge_j, 0] = temp_id
+                            edges = np.append(edges, np.array([(temp_id, tt)]), 0)
+                    else:
+                        corners = np.append(corners, np.array([(new_y, new_x)]), 0)
+                        edge_id1 = edges[edge_i, 1]
+                        edge_id2 = edges[edge_j, 1]
+                        edges[edge_i, 1] = corners.shape[0] - 1
+                        edges[edge_j, 1] = corners.shape[0] - 1
+                        edges = np.append(edges, np.array([(edge_id1, corners.shape[0] - 1)]), 0)
+                        edges = np.append(edges, np.array([(edge_id2, corners.shape[0] - 1)]), 0)
+                    break
+            if flag:
+                break
+        if flag:
+            continue
+        break
+    # remove duplicate and zero degree
+    graph = Graph(np.round(corners), edges)
+    for corner_i in reversed(range(len(graph.getCorners()))):
+        corner_ele1 = graph.getCorners()[corner_i]
+        for corner_j in reversed(range(corner_i)):
+            corner_ele2 = graph.getCorners()[corner_j]
+            if l2_distance(corner_ele1.x, corner_ele2.x) < 3:
+                connected_edge = graph.getEdgeConnected(corner_ele1)
+                for edge_ele in connected_edge:
+                    if edge_ele.x[0] == corner_ele1:
+                        another = edge_ele.x[1]
+                    else:
+                        another = edge_ele.x[0]
+                    if another == corner_ele2:
+                        graph.remove(edge_ele)
+                    edge_ele.x = (another, corner_ele2)
+                graph.remove(corner_ele1)
+    for corner_ele in graph.getCorners():
+        if graph.getCornerDegree(corner_ele) == 0:
+            graph.remove(corner_ele)
+    corners = graph.getCornersArray()
+    edges = graph.getEdgesArray()
+    # if over_all_flag:
+    #    plt.subplot(121)
+    #    ori = render(ori_corners, ori_edges, edge_linewidth=1, corner_size=1)
+    #    temp = np.concatenate((ori.transpose((1,2,0)), np.zeros((ori.shape[1],ori.shape[2],1))),2)
+    #    plt.imshow(temp)
+    #    plt.subplot(122)
+    #    new_ = render(corners, edges, edge_linewidth=1, corner_size=1)
+    #    temp = np.concatenate((new_.transpose((1,2,0)), np.zeros((new_.shape[1],new_.shape[2],1))),2)
+    #    plt.imshow(temp)
+    #    plt.show()
+    return corners, edges
+def get_two_edge_intersection_location(corner11, corner12, corner21, corner22):
+    y1 = corner11[0]
+    x1 = corner11[1]
+    y2 = corner12[0]
+    x2 = corner12[1]
+    a1 = y1 - y2
+    b1 = x2 - x1
+    c1 = x1 * y2 - x2 * y1
+    y1 = corner21[0]
+    x1 = corner21[1]
+    y2 = corner22[0]
+    x2 = corner22[1]
+    a2 = y1 - y2
+    b2 = x2 - x1
+    c2 = x1 * y2 - x2 * y1
+    l = a1 * b2 - a2 * b1
+    if l == 0:
+        l = 1e-5
+    new_x = (c2 * b1 - c1 * b2) / l
+    new_y = (a2 * c1 - a1 * c2) / l
+    return round(new_y), round(new_x)
+def get_distance_of_corner_and_edge(corner1, corner2, corner):
+    x = corner[0]
+    y = corner[1]
+    x1 = corner1[0]
+    y1 = corner1[1]
+    x2 = corner2[0]
+    y2 = corner2[1]
+    cross = (x2 - x1) * (x - x1) + (y2 - y1) * (y - y1)
+    if cross <= 0:
+        # dist to corner1
+        return np.linalg.norm((x - x1, y - y1))
+    d2 = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)
+    if cross >= d2:
+        # dist to corner2
+        return np.linalg.norm((x - x2, y - y2))
+    r = cross / d2
+    px = x1 + (x2 - x1) * r
+    py = y1 + (y2 - y1) * r
+    return np.linalg.norm((x - px, y - py))
+#########################################################################################
+################################# Dataset Functions #####################################
+#########################################################################################
+def EuclideanDistance(A, B):
+    BT = B.transpose()
+    vecProd = np.dot(A, BT)
+    SqA = A ** 2
+    sumSqA = np.matrix(np.sum(SqA, axis=1))
+    sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1]))
+    SqB = B ** 2
+    sumSqB = np.sum(SqB, axis=1)
+    sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1))
+    SqED = sumSqBEx + sumSqAEx - 2 * vecProd
+    SqED[SqED < 0] = 0.0
+    ED = np.sqrt(SqED)
+    return ED
+def samedirection(conv_corner_id, gt_corner_id, conv_corners, gt_corners, conv_edges, gt_edges):
+    # degree
+    if np.where(conv_edges == conv_corner_id)[0].shape[0] != np.where(gt_edges == gt_corner_id)[0].shape[0]:
+        return False
+    # direction
+    place = np.where(conv_edges == conv_corner_id)
+    neighbor_id = conv_edges[place[0], 1 - place[1]]
+    distance = conv_corners[conv_corner_id] - conv_corners[neighbor_id]
+    direction = np.arctan2(distance[:, 0], distance[:, 1]) * 180 / np.pi / 15
+    direction = (direction + 24) % 24
+    conv_dir = np.sort(direction)
+    place = np.where(gt_edges == gt_corner_id)
+    neighbor_id = gt_edges[place[0], 1 - place[1]]
+    distance = gt_corners[gt_corner_id] - gt_corners[neighbor_id]
+    direction = np.arctan2(distance[:, 0], distance[:, 1]) * 180 / np.pi / 15
+    direction = (direction + 24) % 24
+    gt_dir = np.sort(direction)
+    conv_dir = list(conv_dir)
+    gt_dir = list(gt_dir)
+    for angle in gt_dir:
+        temp = sorted(conv_dir, key=lambda x: min(np.abs(x - angle), 24 - np.abs(x - angle)))
+        if min(np.abs(temp[0] - angle), 24 - np.abs(temp[0] - angle)) <= 1.3:
+            conv_dir.remove(temp[0])
+        else:
+            return False
+    return True
+def simplify_gt(gt_match_location, gt_corner, gt_edge):
+    graph = Graph(np.round(gt_corner), gt_edge)
+    for idx, corner in enumerate(graph.getCorners()):
+        # use score to store the matching info
+        corner.store_score(gt_match_location[idx])
+    for idx, corner in enumerate(graph.getCorners()):
+        if corner.get_score() is None:
+            connected_edges = graph.getEdgeConnected(corner)
+            neighbor_corners = []
+            for edge in connected_edges:
+                if edge.x[0] != corner:
+                    neighbor_corners.append(edge.x[0])
+                    continue
+                if edge.x[1] != corner:
+                    neighbor_corners.append(edge.x[1])
+                    continue
+                raise BaseException()
+            neighbor_corners = sorted(neighbor_corners, key=lambda ele: l2_distance(ele.x, corner.x))
+            for neighbor_ele in neighbor_corners:
+                if l2_distance(neighbor_ele.x, corner.x) > 8:
+                    break
+                if neighbor_ele.get_score() is None:
+                    continue
+                # find the suitable neighbor that replace corner
+                for ele in neighbor_corners:
+                    if ele == neighbor_ele:
+                        continue
+                    graph.add_edge(ele, neighbor_ele)
+                neighbor_ele.x = (0.7 * neighbor_ele.x[0] + 0.3 * corner.x[0],
+                                  0.7 * neighbor_ele.x[1] + 0.3 * corner.x[1])
+                graph.remove(corner)
+                break
+    return graph.getCornersArray(), graph.getEdgesArray()
+def get_wrong_corners(corners, gt_corners, edges, gt_edges):
+    corners = corners.copy()
+    gt_corners = gt_corners.copy()
+    edges = edges.copy()
+    gt_edges = gt_edges.copy()
+    dist_matrix = EuclideanDistance(gt_corners, corners)
+    assigned_id = set()
+    gt_match_same_degree = []
+    gt_match_location = []
+    for gt_i in range(gt_corners.shape[0]):
+        sort_id = np.argsort(dist_matrix[gt_i]).__array__()[0]
+        flag = True
+        for id_ in sort_id:
+            if dist_matrix[gt_i, id_] > 7:
+                break
+            temete = samedirection(id_, gt_i, corners, gt_corners, edges, gt_edges)
+            if temete == False:
+                break
+            elif id_ not in assigned_id:
+                assigned_id.add(id_)
+                gt_match_same_degree.append(id_)
+                flag = False
+                break
+        if flag:
+            gt_match_same_degree.append(None)
+    matched = []
+    gt_match_location = [None for _ in range(gt_corners.shape[0])]
+    for gt_i in sorted(list(range(gt_corners.shape[0])), key=lambda i: np.min(dist_matrix[i])):
+        sort_id = np.argsort(dist_matrix[gt_i]).__array__()[0]
+        if dist_matrix[gt_i, sort_id[0]] > 7:
+            gt_match_location[gt_i] = None
+        else:
+            for c_i in sort_id:
+                if c_i in matched:
+                    continue
+                if dist_matrix[gt_i, c_i] > 7:
+                    gt_match_location[gt_i] = None
+                    break
+                else:
+                    gt_match_location[gt_i] = c_i
+                    matched.append(c_i)
+                    break
+    return set(range(corners.shape[0])) - assigned_id, gt_match_same_degree, gt_match_location
+def get_wrong_edges(corners, gt_corners, edges, gt_edges, gt_match):
+    edges = edges.copy()
+    gt_edges = gt_edges.copy()
+    all_possible_good_edges = []
+    for edge_i in range(gt_edges.shape[0]):
+        if gt_match[gt_edges[edge_i, 0]] is None or gt_match[gt_edges[edge_i, 1]] is None:
+            continue
+        all_possible_good_edges.append((gt_match[gt_edges[edge_i, 0]], gt_match[gt_edges[edge_i, 1]]))
+    false_edge_id = []
+    for edge_i in range(edges.shape[0]):
+        id1 = edges[edge_i][0]
+        id2 = edges[edge_i][1]
+        if (id1, id2) not in all_possible_good_edges and (id2, id1) not in all_possible_good_edges:
+            false_edge_id.append(edge_i)
+            continue
+    return false_edge_id
+def get_corner_bin_map(corners, corner_list_for_each_bin, bin_size=10):
+    bin_map = np.zeros((bin_size, 256, 256))
+    for bin_i in range(bin_size):
+        bin_map[bin_i] = render(corners[corner_list_for_each_bin[bin_i]], np.array([]), render_pad=0)[1]
+    return bin_map
+#########################################################################################
+################################ Searching Functions ####################################
+#########################################################################################
+def visualization(candidate, show=True):
+    corners = candidate.graph.getCornersArray()
+    edges = candidate.graph.getEdgesArray()
+    mask = render(corners, edges)
+    mask = np.transpose(np.concatenate((mask, np.zeros((1, 256, 256))), 0), (1, 2, 0))
+    plt.imshow(mask)
+    if show:
+        plt.show()
+def check_intersection(edge1, edge2):
+    corner11 = edge1.x[0].x
+    corner12 = edge1.x[1].x
+    corner21 = edge2.x[0].x
+    corner22 = edge2.x[1].x
+    y1 = corner11[0]
+    x1 = corner11[1]
+    y2 = corner12[0]
+    x2 = corner12[1]
+    a = y1 - y2
+    b = x2 - x1
+    c = x1 * y2 - x2 * y1
+    flag1 = (a * corner21[1] + b * corner21[0] + c) * (a * corner22[1] + b * corner22[0] + c)
+    y1 = corner21[0]
+    x1 = corner21[1]
+    y2 = corner22[0]
+    x2 = corner22[1]
+    a = y1 - y2
+    b = x2 - x1
+    c = x1 * y2 - x2 * y1
+    flag2 = (a * corner11[1] + b * corner11[0] + c) * (a * corner12[1] + b * corner12[0] + c)
+    if flag1 < -1e-6 and flag2 < -1e-6:
+        return True
+    return False
+def adding_a_corner_by_triangle_operation(candidate):
+    new_candidates = []
+    name = candidate.name
+    gt_mask = region_cache.get_region(name)
+    gt_mask = gt_mask > 0.4
+    gt_mask_grow = cv2.dilate(gt_mask.astype(np.float64), np.ones((3, 3), np.uint8), iterations=6) > 0
+    # get the current candidate region mask
+    conv_mask = render(corners=candidate.graph.getCornersArray(), edges=candidate.graph.getEdgesArray(),
+                       render_pad=0, edge_linewidth=1)[0]
+    conv_mask = 1 - conv_mask
+    conv_mask = conv_mask.astype(np.uint8)
+    labels, region_mask = cv2.connectedComponents(conv_mask, connectivity=4)
+    background_label = region_mask[0, 0]
+    all_masks = []
+    for region_i in range(1, labels):
+        if region_i == background_label:
+            continue
+        the_region = region_mask == region_i
+        if the_region.sum() < 20:
+            continue
+        all_masks.append(the_region)
+    candidate_mask = (np.sum(all_masks, 0) + (1 - conv_mask)) > 0
+    final_mask = np.logical_xor(gt_mask_grow, np.logical_and(candidate_mask, gt_mask_grow))
+    for corner_i in range(random.randint(0, 16), 256, 16):
+        for corner_j in range(random.randint(0, 16), 256, 16):
+            if candidate.addable((corner_i, corner_j)):
+                if final_mask[corner_i, corner_j] == True:  # inside the region
+                    new_corner = Element((corner_i, corner_j))
+                    new_candidate = candidate.generate_new_candidate_add_a_corner(new_corner)
+                    new_graph = new_candidate.graph
+                    corners = new_graph.getCorners()
+                    # find two suitable existed corners to make into a triangle (no intersection and no colinear)
+                    for id_A in range(len(corners)):
+                        ele_A = corners[id_A]
+                        if ele_A == new_corner:
+                            continue
+                        for id_B in range(id_A + 1, len(corners)):
+                            ele_B = corners[id_B]
+                            if ele_B == new_corner:
+                                continue
+                            if new_graph.has_edge(new_corner, ele_A) is not None:
+                                raise BaseException('should not have edge in this case')
+                            if new_graph.has_edge(new_corner, ele_B) is not None:
+                                raise BaseException('should not have edge in this case')
+                            temp_edge1 = Element((new_corner, ele_A))
+                            temp_edge2 = Element((new_corner, ele_B))
+                            # check if addable
+                            if new_candidate.addable(temp_edge1) is False:
+                                continue
+                            if new_candidate.addable(temp_edge2) is False:
+                                continue
+                            # avoid intersection
+                            if new_graph.checkIntersectionEdge(temp_edge1):
+                                continue
+                            if new_graph.checkIntersectionEdge(temp_edge2):
+                                continue
+                            # avoid too small triangle
+                            if triangle_region(new_corner.x, ele_A.x, ele_B.x) < 20:
+                                continue
+                            ### avoid colinear edge (only when fold case)
+                            # for edge1
+                            neighbor_edges = new_graph.getEdgeConnected(temp_edge1)
+                            flag_ = True
+                            for neighbor in neighbor_edges:
+                                if new_corner in neighbor.x:
+                                    raise BaseException('new corner should not in any edge')
+                                elif ele_A in neighbor.x:
+                                    shared_corner = ele_A
+                                else:
+                                    raise BaseException('error.')
+                                two_neighbor = {neighbor.x[0], neighbor.x[1], ele_A, new_corner}
+                                two_neighbor.remove(shared_corner)
+                                assert len(two_neighbor) == 2
+                                two_neighbor = tuple(two_neighbor)
+                                line1 = np.array(shared_corner.x) - np.array(two_neighbor[0].x)
+                                line2 = np.array(shared_corner.x) - np.array(two_neighbor[1].x)
+                                cos = np.dot(line1, line2) / (np.linalg.norm(line1) * np.linalg.norm(line2))
+                                cos = min(1, max(-1, cos))
+                                if np.arccos(cos) < np.pi / 9:  # 20 degree
+                                    flag_ = False
+                                    break
+                            if flag_ is False:
+                                continue
+                            # for edge2
+                            neighbor_edges = new_graph.getEdgeConnected(temp_edge2)
+                            flag_ = True
+                            for neighbor in neighbor_edges:
+                                if new_corner in neighbor.x:
+                                    raise BaseException('new corner should not in any edge')
+                                elif ele_B in neighbor.x:
+                                    shared_corner = ele_B
+                                else:
+                                    raise BaseException('error.')
+                                two_neighbor = {neighbor.x[0], neighbor.x[1], ele_B, new_corner}
+                                two_neighbor.remove(shared_corner)
+                                assert len(two_neighbor) == 2
+                                two_neighbor = tuple(two_neighbor)
+                                line1 = np.array(shared_corner.x) - np.array(two_neighbor[0].x)
+                                line2 = np.array(shared_corner.x) - np.array(two_neighbor[1].x)
+                                cos = np.dot(line1, line2) / (np.linalg.norm(line1) * np.linalg.norm(line2))
+                                cos = min(1, max(-1, cos))
+                                if np.arccos(cos) < np.pi / 9:  # 20 degree
+                                    flag_ = False
+                                    break
+                            if flag_ is False:
+                                continue
+                            # make new candidate
+                            try:
+                                new_ = new_candidate.generate_new_candidate_add_an_edge(new_corner, ele_A)
+                                new_ = new_.generate_new_candidate_add_an_edge(new_corner, ele_B)
+                                new_candidates.append(new_)
+                            except:
+                                continue
+                            # plt.subplot(151)
+                            # visualization(candidate, show=False)
+                            # plt.subplot(152)
+                            # plt.imshow(final_mask)
+                            # plt.subplot(153)
+                            # plt.imshow(candidate_mask)
+                            # plt.subplot(154)
+                            # plt.imshow(gt_mask_grow)
+                            # plt.subplot(155)
+                            # visualization(new_, show=False)
+                            # plt.show()
+    return new_candidates
+def adding_an_edge_from_new_corner_operation(candidate):
+    new_candidates = []
+    name = candidate.name
+    gt_mask = region_cache.get_region(name)
+    gt_mask = gt_mask > 0.4
+    gt_mask_grow = cv2.dilate(gt_mask.astype(np.float64), np.ones((3, 3), np.uint8), iterations=6) > 0
+    # get the current candidate region mask
+    conv_mask = render(corners=candidate.graph.getCornersArray(), edges=candidate.graph.getEdgesArray(),
+                       render_pad=0, edge_linewidth=1)[0]
+    conv_mask = 1 - conv_mask
+    conv_mask = conv_mask.astype(np.uint8)
+    labels, region_mask = cv2.connectedComponents(conv_mask, connectivity=4)
+    background_label = region_mask[0, 0]
+    all_masks = []
+    for region_i in range(1, labels):
+        if region_i == background_label:
+            continue
+        the_region = region_mask == region_i
+        if the_region.sum() < 20:
+            continue
+        all_masks.append(the_region)
+    candidate_mask = (np.sum(all_masks, 0) + (1 - conv_mask)) > 0
+    final_mask = np.logical_xor(gt_mask_grow, np.logical_and(candidate_mask, gt_mask_grow))
+    for corner_i in range(random.randint(0, 16), 256, 16):
+        for corner_j in range(random.randint(0, 16), 256, 16):
+            if candidate.addable((corner_i, corner_j)):
+                if final_mask[corner_i, corner_j] == True:
+                    # inside the region
+                    new_corner = Element((corner_i, corner_j))
+                    new_candidate = candidate.generate_new_candidate_add_a_corner(new_corner)
+                    new_graph = new_candidate.graph
+                    corners = new_graph.getCorners()
+                    # find a suitable existed corner that can make
+                    # a new edge with new_corner (no intersection and colinear)
+                    for corner_ele in corners:
+                        if corner_ele == new_corner:
+                            continue
+                        if new_graph.has_edge(new_corner, corner_ele) is not None:
+                            raise BaseException('should not have edge in this case')
+                        temp_edge = Element((new_corner, corner_ele))
+                        # check if addable
+                        if new_candidate.addable(temp_edge) is False:
+                            continue
+                        # avoid intersection
+                        if new_graph.checkIntersectionEdge(temp_edge):
+                            continue
+                        # avoid colinear edge
+                        neighbor_edges = new_graph.getEdgeConnected(temp_edge)
+                        flag_ = True
+                        for neighbor in neighbor_edges:
+                            if new_corner in neighbor.x:
+                                raise BaseException('new corner should not in any edge')
+                            elif corner_ele in neighbor.x:
+                                shared_corner = corner_ele
+                            else:
+                                raise BaseException('error.')
+                            two_neighbor = {neighbor.x[0], neighbor.x[1], corner_ele, new_corner}
+                            two_neighbor.remove(shared_corner)
+                            assert len(two_neighbor) == 2
+                            two_neighbor = tuple(two_neighbor)
+                            line1 = np.array(shared_corner.x) - np.array(two_neighbor[0].x)
+                            line2 = np.array(shared_corner.x) - np.array(two_neighbor[1].x)
+                            cos = np.dot(line1, line2) / (np.linalg.norm(line1) * np.linalg.norm(line2))
+                            cos = min(1, max(-1, cos))
+                            if np.arccos(cos) < np.pi / 9:  # 20 degree
+                                flag_ = False
+                                break
+                        if flag_ is False:
+                            continue
+                        # make new candidate
+                        try:
+                            new_ = new_candidate.generate_new_candidate_add_an_edge(new_corner, corner_ele)
+                            new_candidates.append(new_)
+                        except:
+                            continue
+    return new_candidates
+def removing_a_corner_operation(candidate):
+    new_candidates = []
+    graph = candidate.graph
+    corners = graph.getCorners()
+    for the_corner in corners:
+        if candidate.removable(the_corner):
+            try:
+                new_ = candidate.generate_new_candidate_remove_a_corner(the_corner)
+                new_candidates.append(new_)
+            except:
+                continue
+    return new_candidates
+def removing_a_colinear_corner_operation(candidate):
+    new_candidates = []
+    graph = candidate.graph
+    corners = graph.getCorners()
+    for the_corner in corners:
+        if candidate.removable(the_corner):  # NO NEED TO CHECK IF COLINEAR and graph.checkColinearCorner(the_corner):
+            try:
+                new_ = candidate.generate_new_candidate_remove_a_colinear_corner(the_corner)
+                if new_.graph.checkIntersectionEdge():
+                    continue
+                new_candidates.append(new_)
+            except:
+                continue
+    return new_candidates
+def adding_an_edge_operation(candidate):
+    new_candidates = []
+    graph = candidate.graph
+    corners = graph.getCorners()
+    for corner_i in range(len(corners)):
+        cornerA = corners[corner_i]
+        for corner_j in range(corner_i + 1, len(corners)):
+            cornerB = corners[corner_j]
+            if graph.has_edge(cornerA, cornerB) is not None:
+                continue
+            temp_edge = Element((cornerA, cornerB))
+            # check if addable (not in existed_before dict)
+            if candidate.addable(temp_edge) is False:
+                continue
+            if graph.checkIntersectionEdge(temp_edge):
+                continue
+            # avoid adding a colinear edge
+            neighbor_edges = graph.getEdgeConnected(temp_edge)
+            flag_ = True
+            for neighbor in neighbor_edges:
+                if cornerA in neighbor.x:
+                    shared_corner = cornerA
+                elif cornerB in neighbor.x:
+                    shared_corner = cornerB
+                else:
+                    raise BaseException('error.')
+                two_neighbor = {neighbor.x[0], neighbor.x[1], cornerA, cornerB}
+                two_neighbor.remove(shared_corner)
+                assert len(two_neighbor) == 2
+                two_neighbor = tuple(two_neighbor)
+                line1 = np.array(shared_corner.x) - np.array(two_neighbor[0].x)
+                line2 = np.array(two_neighbor[1].x) - np.array(shared_corner.x)
+                cos = np.dot(line1, line2) / (np.linalg.norm(line1) * np.linalg.norm(line2))
+                cos = min(1, max(-1, cos))
+                if np.arccos(cos) < np.pi / 18 or np.arccos(cos) > np.pi - np.pi / 18:  # 10 degree
+                    flag_ = False
+                    break
+            if flag_ is False:
+                continue
+            # make new candidate
+            try:
+                new_ = candidate.generate_new_candidate_add_an_edge(cornerA, cornerB)
+                new_candidates.append(new_)
+            except:
+                continue
+    return new_candidates
+def removing_an_edge_operation(candidate):
+    new_candidates = []
+    graph = candidate.graph
+    edges = graph.getEdges()
+    for edge_ele in edges:
+        if candidate.removable(edge_ele):
+            try:
+                new_ = candidate.generate_new_candidate_remove_an_edge(edge_ele)
+                new_candidates.append(new_)
+            except:
+                continue
+    return new_candidates
+def adding_an_edge_from_gt(candidate, gt_data):
+    new_candidates = []
+    corners_array = candidate.graph.getCornersArray()
+    edges_array = candidate.graph.getEdgesArray()
+    gt_corners = gt_data['corners'].copy()
+    gt_edges = gt_data['edges'].copy()
+    _, _, map_same_location = get_wrong_corners(
+        corners_array, gt_corners, edges_array, gt_edges)
+    gt_corners, gt_edges = simplify_gt(map_same_location, gt_corners, gt_edges)
+    _, _, map_same_location = get_wrong_corners(
+        corners_array, gt_corners, edges_array, gt_edges)
+    for corner_i in range(gt_corners.shape[0]):
+        if map_same_location[corner_i] is None:
+            # doesn't exist in candidate
+            neighbor_id = get_neighbor_corner_id(corner_i, gt_edges)
+            for corner_j in neighbor_id:
+                if map_same_location[corner_j] is not None:
+                    # exist corner in candidate that maps neighbor corner
+                    new_candidate = candidate.copy()
+                    new_corner = Element(
+                        (
+                            int(np.round(gt_corners[corner_i, 0])), int(np.round(gt_corners[corner_i, 1]))
+                        )
+                    )
+                    if new_candidate.addable(new_corner) is False:
+                        continue
+                    # new corner can be too close to an edge
+                    flag = False
+                    for edge_ele in new_candidate.graph.getEdges():
+                        if get_distance_of_corner_and_edge(edge_ele.x[0].x, edge_ele.x[1].x, new_corner.x) < 7:
+                            flag = True
+                            break
+                    if flag:
+                        continue
+                    new_corner = new_candidate.addCorner(new_corner)
+                    neighbor_index = map_same_location[corner_j]
+                    neighbor_corner = new_candidate.graph.getCorners()[neighbor_index]
+                    new_edge = new_candidate.addEdge(new_corner, neighbor_corner)
+                    if new_candidate.graph.checkIntersectionEdge(new_edge):
+                        continue
+                    new_candidates.append(new_candidate)
+    return new_candidates
+def adding_a_corner_from_two_edges_extension(candidate):
+    new_candidates = []
+    graph = candidate.graph
+    edges = candidate.graph.getEdges()
+    for edge_i in range(len(edges)):
+        for edge_j in range(edge_i + 1, len(edges)):
+            edgeA = edges[edge_i]
+            edgeB = edges[edge_j]
+            if graph.isNeighbor(edgeA, edgeB):
+                continue
+            intersection_loc = get_two_edge_intersection_location(edgeA.x[0].x, edgeA.x[1].x, edgeB.x[0].x,
+                                                                  edgeB.x[1].x)
+            if intersection_loc[0] >= 255 or intersection_loc[1] >= 255 or \
+                    intersection_loc[0] <= 0 or intersection_loc[1] <= 0:
+                continue
+            # intersection point can not be too close to an edge
+            flag = False
+            for edge_ele in graph.getEdges():
+                if get_distance_of_corner_and_edge(edge_ele.x[0].x, edge_ele.x[1].x, intersection_loc) < 7:
+                    flag = True
+                    break
+            if flag:
+                continue
+            new_candidate = candidate.copy()
+            new_graph = new_candidate.graph
+            new_edgeA = new_graph.getRealElement(edgeA)
+            new_edgeB = new_graph.getRealElement(edgeB)
+            new_corner = Element(intersection_loc)
+            if new_candidate.addable(new_corner) is False:
+                continue
+            new_corner = new_candidate.addCorner_v2(new_corner)
+            # get cornerA and cornerB from edgeA, edgeB
+            if l2_distance(new_corner.x, new_edgeA.x[0].x) < l2_distance(new_corner.x, new_edgeA.x[1].x):
+                cornerA = new_edgeA.x[0]
+            else:
+                cornerA = new_edgeA.x[1]
+            if l2_distance(new_corner.x, new_edgeB.x[0].x) < l2_distance(new_corner.x, new_edgeB.x[1].x):
+                cornerB = new_edgeB.x[0]
+            else:
+                cornerB = new_edgeB.x[1]
+            # new edge can not be too short
+            if l2_distance(cornerA.x, new_corner.x) < 7:
+                continue
+            if l2_distance(cornerB.x, new_corner.x) < 7:
+                continue
+            # new intersection cannot be too flat
+            if degree_of_three_corners(cornerA.x, cornerB.x, new_corner.x) > 165:
+                continue
+            flag = False
+            for edge_ele in new_graph.getEdges():
+                if new_corner in edge_ele.x and cornerA in edge_ele.x:
+                    flag = True
+                    break
+                if edge_ele.x[0] not in (new_corner, cornerA):
+                    l = get_distance_of_corner_and_edge(new_corner.x, cornerA.x, edge_ele.x[0].x)
+                    if l <= 7:
+                        flag = True
+                        break
+                if edge_ele.x[1] not in (new_corner, cornerA):
+                    l = get_distance_of_corner_and_edge(new_corner.x, cornerA.x, edge_ele.x[1].x)
+                    if l <= 7:
+                        flag = True
+                        break
+            if flag:
+                continue
+            add_edgeA = new_candidate.addEdge(new_corner, cornerA)
+            if new_graph.checkIntersectionEdge(add_edgeA):
+                continue
+            flag = False
+            for edge_ele in new_graph.getEdges():
+                if new_corner in edge_ele.x and cornerB in edge_ele.x:
+                    flag = True
+                    break
+                if edge_ele.x[0] not in (new_corner, cornerB):
+                    l = get_distance_of_corner_and_edge(new_corner.x, cornerB.x, edge_ele.x[0].x)
+                    if l <= 7:
+                        flag = True
+                        break
+                if edge_ele.x[1] not in (new_corner, cornerB):
+                    l = get_distance_of_corner_and_edge(new_corner.x, cornerB.x, edge_ele.x[1].x)
+                    if l <= 7:
+                        flag = True
+                        break
+            if flag:
+                continue
+            add_edgeB = new_candidate.addEdge(new_corner, cornerB)
+            if new_graph.checkIntersectionEdge(add_edgeB):
+                continue
+            # make real new candidate
+            # new_candidate = candidate.copy()
+            # new_graph = new_candidate.graph
+            # new_corner = Element(intersection_loc)
+            # new_corner = new_graph.add_corner_v2(new_corner)
+            # new_candidate = new_candidate.generate_new_candidate_add_an_edge(new_corner, cornerA)
+            # new_candidate = new_candidate.generate_new_candidate_add_an_edge(new_corner, cornerB)
+            new_candidates.append(new_candidate)
+    return new_candidates
+def adding_a_corner_from_parallel(candidate):
+    new_candidates = []
+    graph = candidate.graph
+    edges = candidate.graph.getEdges()
+    for edge_i in range(len(edges)):
+        for edge_j in range(edge_i + 1, len(edges)):
+            edgeA = edges[edge_i]
+            edgeB = edges[edge_j]
+            # get intersection loc
+            if graph.isNeighbor(edgeA, edgeB):
+                shared_corner = edgeA.x[0] if edgeA.x[0] in edgeB.x else edgeA.x[1]
+                intersection_loc = shared_corner.x
+            else:
+                intersection_loc = get_two_edge_intersection_location(
+                    edgeA.x[0].x, edgeA.x[1].x, edgeB.x[0].x, edgeB.x[1].x)
+            if intersection_loc[0] >= 255 or intersection_loc[1] >= 255 or \
+                    intersection_loc[0] <= 0 or intersection_loc[1] <= 0:
+                continue
+            # get another two loc
+            locA = edgeA.x[1].x if \
+                l2_distance(edgeA.x[0].x, intersection_loc) < l2_distance(edgeA.x[1].x, intersection_loc) else \
+                edgeA.x[0].x
+            locB = edgeB.x[1].x if \
+                l2_distance(edgeB.x[0].x, intersection_loc) < l2_distance(edgeB.x[1].x, intersection_loc) else \
+                edgeB.x[0].x
+            # get new loc
+            new_loc = (locA[0] + locB[0] - intersection_loc[0], locA[1] + locB[1] - intersection_loc[1])
+            if new_loc[0] >= 255 or new_loc[1] >= 255 or \
+                    new_loc[0] <= 0 or new_loc[1] <= 0:
+                continue
+            new_corner = Element(new_loc)
+            new_candidate = candidate.copy()
+            new_graph = new_candidate.graph
+            edgeA = new_graph.getRealElement(edgeA)
+            edgeB = new_graph.getRealElement(edgeB)
+            if new_candidate.addable(new_corner) is False:
+                continue
+            new_corner = new_candidate.addCorner_v2(new_corner)
+            # get cornerA and cornerB from edgeA, edgeB
+            cornerA = edgeA.x[1] if l2_distance(edgeA.x[0].x, intersection_loc) < l2_distance(edgeA.x[1].x,
+                                                                                              intersection_loc) \
+                else edgeA.x[0]
+            cornerB = edgeB.x[1] if l2_distance(edgeB.x[0].x, intersection_loc) < l2_distance(edgeB.x[1].x,
+                                                                                              intersection_loc) \
+                else edgeB.x[0]
+            # new edge can not be too short
+            if l2_distance(cornerA.x, new_corner.x) < 12:
+                continue
+            if l2_distance(cornerB.x, new_corner.x) < 12:
+                continue
+            flag = False
+            for edge_ele in new_graph.getEdges():
+                if new_corner in edge_ele.x and cornerA in edge_ele.x:
+                    flag = True
+                    break
+                if edge_ele.x[0] not in (new_corner, cornerA):
+                    l = get_distance_of_corner_and_edge(new_corner.x, cornerA.x, edge_ele.x[0].x)
+                    if l <= 7:
+                        flag = True
+                        break
+                if edge_ele.x[1] not in (new_corner, cornerA):
+                    l = get_distance_of_corner_and_edge(new_corner.x, cornerA.x, edge_ele.x[1].x)
+                    if l <= 7:
+                        flag = True
+                        break
+            if flag:
+                continue
+            add_edgeA = new_candidate.addEdge(new_corner, cornerA)
+            if new_graph.checkIntersectionEdge(add_edgeA):
+                continue
+            flag = False
+            for edge_ele in new_graph.getEdges():
+                if new_corner in edge_ele.x and cornerB in edge_ele.x:
+                    flag = True
+                    break
+                if edge_ele.x[0] not in (new_corner, cornerB):
+                    l = get_distance_of_corner_and_edge(new_corner.x, cornerB.x, edge_ele.x[0].x)
+                    if l <= 7:
+                        flag = True
+                        break
+                if edge_ele.x[1] not in (new_corner, cornerB):
+                    l = get_distance_of_corner_and_edge(new_corner.x, cornerB.x, edge_ele.x[1].x)
+                    if l <= 7:
+                        flag = True
+                        break
+            if flag:
+                continue
+            add_edgeB = new_candidate.addEdge(new_corner, cornerB)
+            if new_graph.checkIntersectionEdge(add_edgeB):
+                continue
+            new_candidates.append(new_candidate)
+    return new_candidates
+def adding_a_orthogonal_edge(candidate):
+    new_candidates = []
+    graph = candidate.graph
+    edges = candidate.graph.getEdges()
+    for edge in edges:
+        cornerA = edge.x[0]
+        cornerB = edge.x[1]
+        # get orthogonal direction
+        dir_ = (cornerA.x[1] - cornerB.x[1], cornerB.x[0] - cornerA.x[0])
+        for the_corner in edge.x:
+            temp_orth_loc = (the_corner.x[0] - dir_[0], the_corner.x[1] - dir_[1])
+            for inter_edge in edges:
+                if inter_edge == edge:
+                    continue
+                if the_corner in inter_edge.x:
+                    continue
+                intersection_loc = get_two_edge_intersection_location(
+                    the_corner.x, temp_orth_loc, inter_edge.x[0].x, inter_edge.x[1].x
+                )
+                if intersection_loc[0] >= 255 or intersection_loc[1] >= 255 or \
+                        intersection_loc[0] <= 0 or intersection_loc[1] <= 0:
+                    continue
+                if np.dot((inter_edge.x[0].x[0] - intersection_loc[0], inter_edge.x[0].x[1] - intersection_loc[1]),
+                          (inter_edge.x[1].x[0] - intersection_loc[0], inter_edge.x[1].x[1] - intersection_loc[1])) > 0:
+                    # which means the intersection is not inside inter_edge but at the edge extension
+                    continue
+                if l2_distance(intersection_loc, inter_edge.x[0].x) < 5 or \
+                        l2_distance(intersection_loc, inter_edge.x[1].x) < 5:
+                    continue
+                # no thin degree with neighbor edge
+                flag = False
+                neighbor_corners = graph.getNeighborCorner(the_corner)
+                for corner_ele in neighbor_corners:
+                    if corner_ele in edge.x:
+                        continue
+                    if degree_of_three_corners(corner_ele.x, intersection_loc, the_corner.x) < 15:
+                        flag = True
+                        break
+                    if degree_of_three_corners(corner_ele.x, intersection_loc, the_corner.x) > 165:
+                        flag = True
+                        break
+                if flag:
+                    continue
+                new_candidate = candidate.copy()
+                new_graph = new_candidate.graph
+                new_corner = Element(intersection_loc)
+                if new_candidate.addable(new_corner) is False:
+                    continue
+                new_corner = new_candidate.addCorner_v2(new_corner)
+                # new edge can not be too short
+                if l2_distance(new_corner.x, the_corner.x) < 7:
+                    continue
+                add_edge = new_candidate.addEdge(new_corner, new_graph.getRealElement(the_corner))
+                if new_graph.checkIntersectionEdge(add_edge):
+                    continue
+                new_candidates.append(new_candidate)
+    return new_candidates
+class _thread(threading.Thread):
+    def __init__(self, threadID, name, candidate, lock, result_list, func):
+        threading.Thread.__init__(self)
+        self.threadID = threadID
+        self.name = name
+        self.candidate = candidate
+        self.lock = lock
+        self.result_list = result_list
+        self.func = func
+    def run(self):
+        print('running id: ', self.name)
+        start_time = time.time()
+        candidates = self.func(self.candidate)
+        print('test: =================================', self.name, len(candidates))
+        self.lock.acquire()
+        self.result_list.extend(candidates)
+        self.lock.release()
+        print(self.name, "spend time: {}s".format(time.time() - start_time))
+def candidate_enumerate_training(candidate, gt):
+    new_candidates = []
+    # remove a corner
+    try:
+        new_ = removing_a_corner_operation(candidate)
+        if len(new_) > 0:
+            new_candidates.append(random.choice(new_))
+    except:
+        print('something wrong with remove a corner !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
+    # remove a colinear corner
+    try:
+        new_ = removing_a_colinear_corner_operation(candidate)
+        if len(new_) > 0:
+            new_candidates.append(random.choice(new_))
+    except:
+        print('something wrong with remove a colinear corner !!!!!!!!!!!!!!!!!!!!!!!!!!!')
+    # remove an edge
+    try:
+        new_ = removing_an_edge_operation(candidate)
+        if len(new_) > 0:
+            new_candidates.append(random.choice(new_))
+    except:
+        print('something wrong with remove an edge !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
+    # add an edge from existed corner
+    try:
+        new_ = adding_an_edge_operation(candidate)
+        if len(new_) > 0:
+            new_candidates.append(random.choice(new_))
+    except:
+        print('something wrong with add an edge from existed corner !!!!!!!!!!!!!!!!!!!!')
+    # add a corner from two edges
+    try:
+        new_ = adding_a_corner_from_two_edges_extension(candidate)
+        if len(new_) > 0:
+            new_candidates.append(random.choice(new_))
+    except:
+        print('something wrong with add a corner from two edges !!!!!!!!!!!!!!!!!!!!!!!!')
+    try:
+        new_ = adding_a_corner_from_parallel(candidate)
+        if len(new_) > 0:
+            new_candidates.append(random.choice(new_))
+    except:
+        print('something wrong with add a corner from parallel !!!!!!!!!!!!!!!!!!!!!!!!')
+    # add an edge from gt
+    try:
+        new_ = adding_an_edge_from_gt(candidate, gt)
+        if len(new_) > 0:
+            new_candidates.append(random.choice(new_))
+    except:
+        print('something wrong with add an edge from gt !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
+    # add a orthogonal edge
+    try:
+        new_ = adding_a_orthogonal_edge(candidate)
+        if len(new_) > 0:
+            new_candidates.append(random.choice(new_))
+    except:
+        print('something wrong with add a orthogonal edge !!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
+    return new_candidates
+def candidate_enumerate(candidate):
+    new_candidates = []
+    new_candidates.extend(removing_a_corner_operation(candidate))
+    new_candidates.extend(removing_a_colinear_corner_operation(candidate))
+    new_candidates.extend(removing_an_edge_operation(candidate))
+    new_candidates.extend(adding_an_edge_operation(candidate))
+    new_candidates.extend(adding_a_corner_from_two_edges_extension(candidate))
+    new_candidates.extend(adding_a_corner_from_parallel(candidate))
+    new_candidates.extend(adding_a_orthogonal_edge(candidate))
+    return new_candidates
+def candidate_enumerate_thread(candidate):
+    new_candidates = []
+    lock = threading.Lock()
+    thread1 = _thread(1, 'remove_a_corner', candidate, lock, new_candidates, removing_a_corner_operation)
+    thread2 = _thread(2, 'remove_a_colinear_corner', candidate, lock, new_candidates,
+                      removing_a_colinear_corner_operation)
+    thread3 = _thread(3, 'add_an_edge', candidate, lock, new_candidates, adding_an_edge_operation)
+    thread4 = _thread(4, 'remove_an_edge', candidate, lock, new_candidates, removing_an_edge_operation)
+    thread1.start()
+    thread2.start()
+    thread3.start()
+    thread4.start()
+    threads = []
+    threads.append(thread1)
+    threads.append(thread2)
+    threads.append(thread3)
+    threads.append(thread4)
+    for t in threads:
+        t.join()
+    return new_candidates
+def reduce_duplicate_candidate(candidates):
+    i = 0
+    while i < len(candidates):
+        for j in reversed(range(i + 1, len(candidates))):
+            if candidates[i].equal(candidates[j]):
+                del candidates[j]
+        i = i + 1
+    return candidates
+def save_candidate_image(candidate, base_path, base_name):
+    corners = candidate.graph.getCornersArray()
+    edges = candidate.graph.getEdgesArray()
+    # graph svg
+    svg = svg_generate(corners, edges, base_name, samecolor=True)
+    svg.saveas(os.path.join(base_path, base_name + '.svg'))
+    # corner image
+    temp_mask = np.zeros((256, 256))
+    for ele in candidate.graph.getCorners():
+        if ele.get_score() < 0:
+            temp_mask = cv2.circle(temp_mask, ele.x[::-1], 3, 1, -1)
+    fig = plt.figure(frameon=False)
+    fig.set_size_inches(1, 1)
+    ax = plt.Axes(fig, [0., 0., 1., 1.])
+    ax.set_axis_off()
+    fig.add_axes(ax)
+    ax.imshow(temp_mask, aspect='auto')
+    fig.savefig(os.path.join(base_path, base_name + '_corner.png'), dpi=256)
+    # edges image
+    temp_mask = np.zeros((256, 256))
+    for ele in candidate.graph.getEdges():
+        if ele.get_score() < 0:
+            A = ele.x[0]
+            B = ele.x[1]
+            temp_mask = cv2.line(temp_mask, A.x[::-1], B.x[::-1], 1, thickness=1)
+    ax.imshow(temp_mask, aspect='auto')
+    fig.savefig(os.path.join(base_path, base_name + '_edge.png'), dpi=256)
+    # region no need fig
+    plt.close()
+#########################################################################################
+###################################### Class ############################################
+#########################################################################################
+class Element:
+    def __init__(self, x, safe_count=0):
+        assert type(x) is tuple
+        assert type(x[0]) == int or type(x[0]) == Element
+        assert type(x[1]) == int or type(x[1]) == Element
+        self.x = x
+        self.__score = None
+        self.safe_count = safe_count
+    def store_score(self, score):
+        self.__score = score
+    def get_score(self):
+        return self.__score
+    def equal(self, ele):
+        if type(self.x[0]) != type(ele.x[0]):
+            return False
+        if type(self.x[0]) == int:
+            # corner
+            return True if self.x[0] == ele.x[0] and self.x[1] == ele.x[1] else False
+        if type(self.x[0]) == Element:
+            # edge
+            if self.x[0].equal(ele.x[0]) and self.x[1].equal(ele.x[1]):
+                return True
+            if self.x[1].equal(ele.x[0]) and self.x[0].equal(ele.x[1]):
+                return True
+            return False
+        raise BaseException('no implement type')
+class regionCache():
+    def __init__(self, datapath):
+        self.cache = {}
+        self.datapath = datapath
+    def get_region(self, name):
+        if name in self.cache.keys():
+            return self.cache[name]
+        gt_mask = np.load(os.path.join(self.datapath, name + '.npy'))
+        if len(self.cache) == 5:
+            self.cache.pop(list(self.cache.keys())[0])
+            self.cache[name] = gt_mask
+        return gt_mask
+class imgCache():
+    def __init__(self, datapath):
+        self.cache = {}
+        self.datapath = datapath
+    def get_image(self, name):
+        if name in self.cache.keys():
+            return self.cache[name]
+        img = skimage.img_as_float(plt.imread(os.path.join(self.datapath, 'rgb', name + '.jpg')))
+        if len(self.cache) == 5:
+            self.cache.pop(list(self.cache.keys())[0])
+            self.cache[name] = img
+        return img
+class Graph:
+    def __init__(self, corners, edges):
+        corners, edges = sort_graph(corners, edges)
+        self.__corners = []
+        for corner_i in range(corners.shape[0]):
+            self.__corners.append(
+                Element(
+                    tuple(
+                        (int(corners[corner_i, 0]), int(corners[corner_i, 1]))
+                    )
+                )
+            )
+        self.__edges = []
+        for edge_i in range(edges.shape[0]):
+            self.__edges.append(Element((self.__corners[edges[edge_i, 0]], self.__corners[edges[edge_i, 1]])))
+        self.__regions = []
+        self.__regions.append(Element((0, 0)))  # we use entire region here
+    @classmethod
+    def initialFromTuple(cls, corners, edges):
+        edge_index = []
+        for item in edges:
+            a = corners.index(item[0])
+            b = corners.index(item[1])
+            edge_index.append((a, b))
+        edge_index = np.array(edge_index)
+        corners = np.array(corners)
+        return cls(corners, edge_index)
+    def store_score(self, corner_score=None, edge_score=None, region_score=None):
+        '''
+        :param corner_score: np array size: len(corners)
+        :param edge_score:  np array size: len(edges)
+        :param region_score: np.array size: len(regions)
+        :return:
+        '''
+        if corner_score is not None:
+            for idx, element in enumerate(self.__corners):
+                element.store_score(corner_score[idx])
+        if edge_score is not None:
+            for idx, element in enumerate(self.__edges):
+                element.store_score(edge_score[idx])
+        if region_score is not None:
+            for idx, element in enumerate(self.__regions):
+                element.store_score(region_score[idx])
+        return
+    def getCornersArray(self):
+        c = []
+        for ele in self.__corners:
+            c.append(ele.x)
+        return np.array(c)
+    def getEdgesArray(self):
+        c = []
+        for ele in self.__edges:
+            corner1 = ele.x[0]
+            corner2 = ele.x[1]
+            idx1 = self.__corners.index(corner1)
+            idx2 = self.__corners.index(corner2)
+            c.append([idx1, idx2])
+        return np.array(c)
+    def getCorners(self):
+        return self.__corners
+    def getRegions(self):
+        return self.__regions
+    def getEdges(self):
+        return self.__edges
+    def graph_score(self):
+        corner_score = 0
+        for ele in self.__corners:
+            corner_score += ele.get_score()
+        edge_score = 0
+        for ele in self.__edges:
+            edge_score += ele.get_score()
+        region_score = 0
+        for ele in self.__regions:
+            region_score += ele.get_score()
+        return score_weights[0] * corner_score + score_weights[1] * edge_score + score_weights[2] * region_score
+    def corner_score(self):
+        corner_score = 0
+        for ele in self.__corners:
+            corner_score += ele.get_score()
+        return corner_score
+    def edge_score(self):
+        edge_score = 0
+        for ele in self.__edges:
+            edge_score += ele.get_score()
+        return edge_score
+    def region_score(self):
+        region_score = 0
+        for ele in self.__regions:
+            region_score += ele.get_score()
+        return region_score
+    def remove(self, ele):
+        '''
+        :param ele: remove eles as well as some other related elements
+        :return: set() of removed elements
+        '''
+        # corner
+        removed = set()
+        if ele in self.__corners:
+            self.__corners.remove(ele)
+            removed.add(ele)
+            # remove edge that has the corner
+            for idx in reversed(range(len(self.__edges))):
+                edge_ele = self.__edges[idx]
+                if ele in edge_ele.x:
+                    removed = removed.union(self.remove(edge_ele))
+        # edge
+        elif ele in self.__edges:
+            self.__edges.remove(ele)
+            removed.add(ele)
+            corner1 = ele.x[0]
+            corner2 = ele.x[1]
+            if corner1.safe_count == 0:
+                # can be delete
+                _count = 0
+                for edge_ele in self.__edges:
+                    if corner1 in edge_ele.x:
+                        _count += 1
+                if _count == 0:
+                    removed = removed.union(self.remove(corner1))
+            if corner2.safe_count == 0:
+                # can be delete
+                _count = 0
+                for edge_ele in self.__edges:
+                    if corner2 in edge_ele.x:
+                        _count += 1
+                if _count == 0:
+                    removed = removed.union(self.remove(corner2))
+        return removed
+    def has_edge(self, ele1, ele2):
+        """
+        :param ele1: corner1
+        :param ele2: corner2
+        :return: edge or none
+        """
+        for edge_ele in self.__edges:
+            if ele1 in edge_ele.x and ele2 in edge_ele.x:
+                return edge_ele
+        return None
+    def add_edge(self, ele1, ele2):
+        temp = self.has_edge(ele1, ele2)
+        if temp is not None:
+            temp.safe_count = SAFE_NUM
+            return temp
+        new_ele = Element((ele1, ele2), safe_count=SAFE_NUM)
+        self.__edges.append(new_ele)
+        return new_ele
+    def add_corner(self, ele):
+        for corner in self.__corners:
+            if corner.x == ele.x:
+                corner.safe_count = SAFE_NUM
+                return corner
+        ele.safe_count = SAFE_NUM
+        self.__corners.append(ele)
+        return ele
+    def add_corner_v2(self, ele):
+        # if new corner is near a existed corner, return the existed corner
+        # if new corner is on an edge, split edge
+        for corner in self.__corners:
+            if l2_distance(corner.x, ele.x) < 5:
+                corner.safe_count = SAFE_NUM
+                return corner
+        min_d = 256
+        the_edge = None
+        for edge in self.__edges:
+            temp = get_distance_of_corner_and_edge(edge.x[0].x, edge.x[1].x, ele.x)
+            if temp < min_d:
+                min_d = temp
+                the_edge = edge
+        if min_d < 3:
+            # split edge
+            corner1 = the_edge.x[0]
+            corner2 = the_edge.x[1]
+            new_ele = Element((corner1, ele), safe_count=the_edge.safe_count)
+            self.__edges.append(new_ele)
+            new_ele = Element((corner2, ele), safe_count=the_edge.safe_count)
+            self.__edges.append(new_ele)
+            self.__edges.remove(the_edge)
+        ele.safe_count = SAFE_NUM
+        self.__corners.append(ele)
+        return ele
+    def checkColinearCorner(self, ele):
+        if self.getCornerDegree(ele) != 2:
+            return False
+        edge_in = []
+        for edge_ele in self.__edges:
+            if ele in edge_ele.x:
+                edge_in.append(edge_ele)
+                if len(edge_in) == 2:
+                    break
+        two_neighbor = {edge_in[0].x[0], edge_in[0].x[1], edge_in[1].x[0], edge_in[1].x[1]}
+        two_neighbor.remove(ele)
+        two_neighbor = tuple(two_neighbor)
+        if self.has_edge(two_neighbor[0], two_neighbor[1]) is not None:
+            return False
+        line1 = np.array(ele.x) - np.array(two_neighbor[0].x)
+        line2 = np.array(two_neighbor[1].x) - np.array(ele.x)
+        cos = np.dot(line1, line2) / (np.linalg.norm(line1) * np.linalg.norm(line2))
+        cos = min(1, max(-1, cos))
+        if np.arccos(cos) < np.pi / 9:  # 20 degree
+            return True
+        return False
+    def checkIntersectionEdge(self, ele=None):
+        if ele is None:
+            for edge_i in range(len(self.__edges)):
+                for edge_j in range(edge_i + 1, len(self.__edges)):
+                    if check_intersection(self.__edges[edge_i], self.__edges[edge_j]):
+                        return True
+            return False
+        for edge_ele in self.__edges:
+            if ele == edge_ele:
+                continue
+            if check_intersection(edge_ele, ele):
+                return True
+        return False
+    def getCornerDegree(self, ele):
+        degree = 0
+        for edge_ele in self.__edges:
+            if ele in edge_ele.x:
+                degree += 1
+        return degree
+    def getEdgeConnected(self, ele):
+        out_ = set()
+        if type(ele.x[0]) == int:
+            # corner
+            for edge_ele in self.__edges:
+                if ele in edge_ele.x:
+                    out_.add(edge_ele)
+            return out_
+        if type(ele.x[0]) == Element:
+            # Edge
+            out_ = out_.union(self.getEdgeConnected(ele.x[0]))
+            out_ = out_.union(self.getEdgeConnected(ele.x[1]))
+            if ele in out_:
+                out_.remove(ele)
+            return out_
+    def getNeighborCorner(self, ele):
+        out_ = set()
+        for edge_ele in self.__edges:
+            if ele == edge_ele.x[0]:
+                out_.add(edge_ele.x[1])
+            if ele == edge_ele.x[1]:
+                out_.add(edge_ele.x[0])
+        return out_
+    def getRealElement(self, ele):
+        # edge
+        if type(ele.x[0]) == Element:
+            for e in self.__edges:
+                if (e.x[0].x == ele.x[0].x and e.x[1].x == ele.x[1].x) or \
+                        (e.x[1].x == ele.x[0].x and e.x[0].x == ele.x[1].x):
+                    return e
+            raise BaseException("no same edge exists.")
+        # corner
+        elif type(ele.x[0]) == int:
+            for c in self.__corners:
+                if c.x == ele.x:
+                    return c
+            raise BaseException("no same corner exists.")
+    def copy(self):
+        corners = self.getCornersArray()
+        edges = self.getEdgesArray()
+        new_graph = Graph(corners, edges)
+        for idx, ele in enumerate(self.__corners):
+            new_graph.__corners[idx].store_score(self.__corners[idx].get_score())
+        for idx, ele in enumerate(self.__edges):
+            new_graph.__edges[idx].store_score(self.__edges[idx].get_score())
+        for idx, ele in enumerate(self.__regions):
+            new_graph.__regions[idx].store_score(self.__regions[idx].get_score)
+        return new_graph
+    def update_safe_count(self):
+        for ele in self.__corners:
+            if ele.safe_count > 0:
+                ele.safe_count -= 1
+        for ele in self.__edges:
+            if ele.safe_count > 0:
+                ele.safe_count -= 1
+    def isNeighbor(self, element1, element2):
+        '''
+        :param element1:
+        :param element2:
+        :return: True / False
+        '''
+        if element1 == element2:
+            return False
+        if type(element1.x[0]) != type(element2.x[0]):
+            # corner and edge
+            return False
+        if type(element1.x[0]) == int:
+            # both are corner type
+            for edge_ele in self.__edges:
+                if edge_ele.x[0] == element1 and edge_ele.x[1] == element2:
+                    return True
+                if edge_ele.x[0] == element2 and edge_ele.x[1] == element1:
+                    return True
+            return False
+        if type(element1.x[0]) == Element:
+            # both are edge type
+            if len({element1.x[0], element1.x[1], element2.x[0], element2.x[1]}) < 4:
+                return True
+            return False
+    def equal(self, graph):
+        if len(self.__corners) != len(graph.__corners) or \
+                len(self.__edges) != len(graph.__edges):
+            return False
+        for corner_i in range(len(self.__corners)):
+            if self.__corners[corner_i].equal(graph.__corners[corner_i]) is False:
+                return False
+        for edge_i in range(len(self.__edges)):
+            if self.__edges[edge_i].equal(graph.__edges[edge_i]) is False:
+                return False
+        return True
+class Candidate:
+    def __init__(self, graph, name, corner_existed_before, edge_existed_before):
+        '''
+        :param graph: Class graph
+        :param name: string, data name
+        :param corner_existed_before: dict {(x_i,y_i):c_1 ...} indicates counts for corresponding corners, after one search,
+                                     counts -= 1, if count == 0, remove from the set.
+        :param edge_existed_before: dict {((x_i1,y_i1),(x_i2,y_i2)):ci}
+        '''
+        self.graph = graph
+        self.name = name
+        self.corner_existed_before = corner_existed_before
+        self.edge_existed_before = edge_existed_before
+    @classmethod
+    def initial(cls, graph, name):
+        return cls(graph, name, {}, {})
+    def update(self):
+        # all the existed before elements count - 1
+        for key in self.corner_existed_before.keys():
+            self.corner_existed_before[key] -= 1
+        for key in self.edge_existed_before.keys():
+            self.edge_existed_before[key] -= 1
+        # check if some need to remove from existed before set
+        for key in list(self.corner_existed_before.keys()):
+            if self.corner_existed_before[key] == 0:
+                self.corner_existed_before.pop(key)
+        for key in list(self.edge_existed_before.keys()):
+            if self.edge_existed_before[key] == 0:
+                self.edge_existed_before.pop(key)
+        # update graph
+        self.graph.update_safe_count()
+    def copy(self):
+        corner_existed_before = self.corner_existed_before.copy()
+        edge_existed_before = self.edge_existed_before.copy()
+        new_graph = self.graph.copy()
+        return Candidate(new_graph, self.name, corner_existed_before, edge_existed_before)
+    def removable(self, ele):
+        '''
+        :param x: input is element
+        :return:
+        '''
+        assert type(ele) == Element
+        # edge
+        return True if ele.safe_count == 0 else False
+    def addable(self, ele):
+        if type(ele) == Element:
+            if type(ele.x[0]) == Element:
+                # edge
+                for edge in self.graph.getEdges():
+                    c1 = edge.x[0]
+                    c2 = edge.x[1]
+                    if (ele.x[0].x == c1.x and ele.x[1].x == c2.x) or \
+                            (ele.x[1].x == c1.x and ele.x[0].x == c2.x):
+                        # already existed
+                        return False
+                corner1_loc = ele.x[0].x
+                corner2_loc = ele.x[1].x
+                if (corner1_loc, corner2_loc) in self.edge_existed_before.keys() or \
+                        (corner2_loc, corner1_loc) in self.edge_existed_before.keys():
+                    return False
+                return True
+            else:
+                # corner
+                for corner in self.graph.getCorners():
+                    if l2_distance(ele.x, corner.x) < TWO_CORNER_MINIMUM_DISTANCE:
+                        # already existed
+                        return False
+                if ele.x in self.corner_existed_before.keys():
+                    return False
+                return True
+        else:  # (x,y) or ((x1,y1),(x2,y2))
+            if type(ele[0]) == tuple:
+                # edge
+                corner1_loc = ele[0]
+                corner2_loc = ele[1]
+                for edge in self.graph.getEdges():
+                    c1 = edge.x[0]
+                    c2 = edge.x[1]
+                    if (corner1_loc == c1.x and corner2_loc == c2.x) or \
+                            (corner2_loc == c1.x and corner1_loc == c2.x):
+                        # already existed
+                        return False
+                if (corner1_loc, corner2_loc) in self.edge_existed_before.keys() or \
+                        (corner2_loc, corner1_loc) in self.edge_existed_before.keys():
+                    return False
+                return True
+            else:
+                # corner
+                for corner in self.graph.getCorners():
+                    if l2_distance(ele, corner.x) < TWO_CORNER_MINIMUM_DISTANCE:
+                        # already existed
+                        return False
+                if ele in self.corner_existed_before.keys():
+                    return False
+                return True
+    def addCorner(self, ele):
+        if ele.x in self.corner_existed_before.keys():
+            raise BaseException('cannot add the corner')
+        new_ele = self.graph.add_corner(ele)  # possible changed
+        return new_ele
+    def addCorner_v2(self, ele):
+        if ele.x in self.corner_existed_before.keys():
+            raise BaseException('cannot add the corner')
+        new_ele = self.graph.add_corner_v2(ele)
+        return new_ele
+    def addEdge(self, ele1, ele2):
+        corner1 = ele1
+        corner2 = ele2
+        assert corner1 in self.graph.getCorners()
+        assert corner2 in self.graph.getCorners()
+        if (corner1.x, corner2.x) in self.edge_existed_before.keys() or \
+                (corner2.x, corner1.x) in self.edge_existed_before.keys():
+            raise BaseException('cannot add the edge')
+        new_ele = self.graph.add_edge(corner1, corner2)
+        return new_ele
+    def removeCorner(self, ele):
+        if ele.x in self.corner_existed_before.keys():
+            raise BaseException('already existed.')
+        self.corner_existed_before[ele.x] = SAFE_NUM
+    def removeEdge(self, ele):
+        corner1 = ele.x[0]
+        corner2 = ele.x[1]
+        loc1 = corner1.x
+        loc2 = corner2.x
+        if (loc1[0] > loc2[0]) or (loc1[0] == loc2[0] and loc1[1] > loc2[1]):
+            loc1 = corner2.x
+            loc2 = corner1.x
+        if (loc1, loc2) in self.edge_existed_before.keys():
+            raise BaseException('already existed.')
+        self.edge_existed_before[(loc1, loc2)] = SAFE_NUM
+    def generate_new_candidate_remove_a_colinear_corner(self, ele):
+        # need to check if ele is a colinear corner before
+        new_candidate = self.copy()
+        new_graph = new_candidate.graph
+        ele = new_graph.getRealElement(ele)
+        # find two neighbor corners
+        temp = set()
+        for element in new_graph.getEdgeConnected(ele):
+            # edge
+            if type(element.x[0]) == Element:
+                temp.add(element.x[0])
+                temp.add(element.x[1])
+        temp.remove(ele)
+        temp = tuple(temp)
+        assert len(temp) == 2
+        # add edge to two neighbor corners
+        # (add before remove, in case the neighbor corners will be removed by zero degree)
+        # special case no need to check existed_before, instead remove if in existed_before dict
+        added = new_graph.add_edge(temp[0], temp[1])
+        if (temp[0].x, temp[1].x) in self.edge_existed_before.keys():
+            self.edge_existed_before.pop((temp[0].x, temp[1].x))
+        if (temp[1].x, temp[0].x) in self.edge_existed_before.keys():
+            self.edge_existed_before.pop((temp[1].x, temp[0].x))
+        # remove
+        removed = new_graph.remove(ele)
+        # add removed elements into existed before
+        for element in removed:
+            # edge
+            if type(element.x[0]) == Element:
+                new_candidate.removeEdge(element)
+            # corner
+            elif type(element.x[0]) == int:
+                new_candidate.removeCorner(element)
+            else:
+                raise BaseException('wrong type.')
+        # modify scores that need to be recounted
+        # all corners are recounted
+        for element in new_graph.getCorners():
+            element.store_score(None)
+        # edges that are neighbors to the removed edges OR new edges will be recounted
+        for element in new_graph.getEdges():
+            for modified_ele in removed.union({added}):
+                if new_graph.isNeighbor(element, modified_ele):
+                    element.store_score(None)
+                    break
+        # all regions are recounted
+        for element in new_graph.getRegions():
+            element.store_score(None)
+        return new_candidate
+    def generate_new_candidate_remove_a_corner(self, ele):
+        # need to check if ele is removable before call this method
+        new_candidate = self.copy()
+        new_graph = new_candidate.graph
+        ele = new_graph.getRealElement(ele)
+        removed = new_graph.remove(ele)
+        # add removed elements into existed before
+        for element in removed:
+            # edge
+            if type(element.x[0]) == Element:
+                corner1 = element.x[0]
+                corner2 = element.x[1]
+                loc1 = corner1.x
+                loc2 = corner2.x
+                if (loc1[0] > loc2[0]) or (loc1[0] == loc2[0] and loc1[1] > loc2[1]):
+                    loc1 = corner2.x
+                    loc2 = corner1.x
+                if (loc1, loc2) in self.edge_existed_before.keys():
+                    raise BaseException('already existed.')
+                new_candidate.edge_existed_before[(loc1, loc2)] = SAFE_NUM
+            # corner
+            elif type(element.x[0]) == int:
+                if element.x in self.corner_existed_before.keys():
+                    raise BaseException('already existed.')
+                new_candidate.corner_existed_before[element.x] = SAFE_NUM
+            else:
+                raise BaseException('wrong type.')
+        # modify scores that need to be recounted
+        # all corners are recounted
+        for element in new_graph.getCorners():
+            element.store_score(None)
+        # edges that are neighbors to the removed edges will be recounted
+        for element in new_graph.getEdges():
+            for removed_ele in removed:
+                if new_graph.isNeighbor(element, removed_ele):
+                    element.store_score(None)
+                    break
+        # all regions are recounted
+        for element in new_graph.getRegions():
+            element.store_score(None)
+        return new_candidate
+    def generate_new_candidate_add_an_edge(self, ele1, ele2):
+        # need to check addable before call this method
+        new_candidate = self.copy()
+        new_graph = new_candidate.graph
+        ele1 = new_graph.getRealElement(ele1)
+        ele2 = new_graph.getRealElement(ele2)
+        # add edge
+        new_ele = new_candidate.addEdge(ele1, ele2)
+        # modify scores that need to be recounted
+        # all corners are recounted
+        for element in new_graph.getCorners():
+            element.store_score(None)
+        # edges that are neighbors to the added edges will be recounted
+        for element in new_graph.getEdges():
+            if new_graph.isNeighbor(element, new_ele):
+                element.store_score(None)
+        # all regions are recounted
+        for element in new_graph.getRegions():
+            element.store_score(None)
+        return new_candidate
+    def generate_new_candidate_remove_an_edge(self, ele):
+        # need to check if ele is removable before call this method
+        new_candidate = self.copy()
+        new_graph = new_candidate.graph
+        ele = new_graph.getRealElement(ele)
+        removed = new_graph.remove(ele)
+        # add removed elements into existed before
+        for element in removed:
+            # edge
+            if type(element.x[0]) == Element:
+                corner1 = element.x[0]
+                corner2 = element.x[1]
+                loc1 = corner1.x
+                loc2 = corner2.x
+                if (loc1[0] > loc2[0]) or (loc1[0] == loc2[0] and loc1[1] > loc2[1]):
+                    loc1 = corner2.x
+                    loc2 = corner1.x
+                if (loc1, loc2) in self.edge_existed_before.keys():
+                    raise BaseException('already existed.')
+                new_candidate.edge_existed_before[(loc1, loc2)] = SAFE_NUM
+            # corner
+            elif type(element.x[0]) == int:
+                if element.x in self.corner_existed_before.keys():
+                    raise BaseException('already existed.')
+                new_candidate.corner_existed_before[element.x] = SAFE_NUM
+            else:
+                raise BaseException('wrong type.')
+        # modify scores that need to be recounted
+        # all corners are recounted
+        for element in new_graph.getCorners():
+            element.store_score(None)
+        # edges that are neighbors to the removed edges will be recounted
+        for element in new_graph.getEdges():
+            for removed_ele in removed:
+                if new_graph.isNeighbor(element, removed_ele):
+                    element.store_score(None)
+                    break
+        # all regions are recounted
+        for element in new_graph.getRegions():
+            element.store_score(None)
+        return new_candidate
+    def generate_new_candidate_add_a_new_triangle(self, ele_new, ele1, ele2):
+        # this method is to add a new corner as well as two new edges into the graph
+        # need to check addable of ele_new before call this method
+        new_candidate = self.copy()
+        new_graph = new_candidate.graph
+        ele1 = new_graph.getRealElement(ele1)
+        ele2 = new_graph.getRealElement(ele2)
+        # add corner
+        ele_new = new_candidate.addCorner(ele_new)  # ele_new possible change
+        # no score need to be recounted in current situation
+        # add two_new edge (ele1, ele_new) and (ele2, ele_new)
+        new_candidate = new_candidate.generate_new_candidate_add_an_edge(ele_new, ele1)
+        new_candidate = new_candidate.generate_new_candidate_add_an_edge(ele_new, ele2)
+        return new_candidate
+    def generate_new_candidate_add_a_corner(self, ele):
+        # need to check addable of ele before call this method
+        new_candidate = self.copy()
+        new_graph = new_candidate.graph
+        # add corner
+        ele = new_candidate.addCorner(ele)
+        # modify scores that need to be recounted
+        # all corners are recounted
+        for element in new_graph.getCorners():
+            element.store_score(None)
+        # no edge need to be recounted
+        # all regions are recounted
+        for element in new_graph.getRegions():
+            element.store_score(None)
+        return new_candidate
+    def equal(self, candidate):
+        return self.graph.equal(candidate.graph)

models/__init__.py ADDED Viewed

File without changes

models/corner_models.py ADDED Viewed

	@@ -0,0 +1,275 @@

+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+import numpy as np
+import math
+from models.deformable_transformer import DeformableTransformerEncoderLayer, DeformableTransformerEncoder, \
+    DeformableTransformerDecoder, DeformableAttnDecoderLayer
+from models.ops.modules import MSDeformAttn
+from models.resnet import convrelu
+from torch.nn.init import xavier_uniform_, constant_, uniform_, normal_
+from einops.layers.torch import Rearrange
+from utils.misc import NestedTensor
+class HeatCorner(nn.Module):
+    """
+        The corner model of HEAT is the edge model till the edge-filtering part. So only per-candidate prediction w/o
+    relational modeling.
+    """
+    def __init__(self, input_dim, hidden_dim, num_feature_levels, backbone_strides, backbone_num_channels, ):
+        super(HeatCorner, self).__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.num_feature_levels = num_feature_levels
+        if num_feature_levels > 1:
+            num_backbone_outs = len(backbone_strides)
+            input_proj_list = []
+            for _ in range(num_backbone_outs):
+                in_channels = backbone_num_channels[_]
+                input_proj_list.append(nn.Sequential(
+                    nn.Conv2d(in_channels, hidden_dim, kernel_size=1),
+                    nn.GroupNorm(32, hidden_dim),
+                ))
+            for _ in range(num_feature_levels - num_backbone_outs):
+                input_proj_list.append(nn.Sequential(
+                    nn.Conv2d(in_channels, hidden_dim, kernel_size=3, stride=2, padding=1),
+                    nn.GroupNorm(32, hidden_dim),
+                ))
+                in_channels = hidden_dim
+            self.input_proj = nn.ModuleList(input_proj_list)
+        else:
+            self.input_proj = nn.ModuleList([
+                nn.Sequential(
+                    nn.Conv2d(backbone_num_channels[0], hidden_dim, kernel_size=1),
+                    nn.GroupNorm(32, hidden_dim),
+                )])
+        self.patch_size = 4
+        patch_dim = (self.patch_size ** 2) * input_dim
+        self.to_patch_embedding = nn.Sequential(
+            Rearrange('b (h p1) (w p2) c -> b (h w) (p1 p2 c)', p1=self.patch_size, p2=self.patch_size),
+            nn.Linear(patch_dim, input_dim),
+            nn.Linear(input_dim, hidden_dim),
+        )
+        self.pixel_pe_fc = nn.Linear(input_dim, hidden_dim)
+        self.transformer = CornerTransformer(d_model=hidden_dim, nhead=8, num_encoder_layers=1,
+                                             dim_feedforward=1024, dropout=0.1)
+        self.img_pos = PositionEmbeddingSine(hidden_dim // 2)
+    @staticmethod
+    def get_ms_feat(xs, img_mask):
+        out: Dict[str, NestedTensor] = {}
+        for name, x in sorted(xs.items()):
+            m = img_mask
+            assert m is not None
+            mask = F.interpolate(m[None].float(), size=x.shape[-2:]).to(torch.bool)[0]
+            out[name] = NestedTensor(x, mask)
+        return out
+    @staticmethod
+    def get_decoder_reference_points(height, width, device):
+        ref_y, ref_x = torch.meshgrid(torch.linspace(0.5, height - 0.5, height, dtype=torch.float32, device=device),
+                                      torch.linspace(0.5, width - 0.5, width, dtype=torch.float32, device=device))
+        ref_y = ref_y.reshape(-1)[None] / height
+        ref_x = ref_x.reshape(-1)[None] / width
+        ref = torch.stack((ref_x, ref_y), -1)
+        return ref
+    def forward(self, image_feats, feat_mask, pixels_feat, pixels, all_image_feats):
+        # process image features
+        features = self.get_ms_feat(image_feats, feat_mask)
+        srcs = []
+        masks = []
+        all_pos = []
+        new_features = list()
+        for name, x in sorted(features.items()):
+            new_features.append(x)
+        features = new_features
+        for l, feat in enumerate(features):
+            src, mask = feat.decompose()
+            mask = mask.to(src.device)
+            srcs.append(self.input_proj[l](src))
+            pos = self.img_pos(src).to(src.dtype)
+            all_pos.append(pos)
+            masks.append(mask)
+            assert mask is not None
+        if self.num_feature_levels > len(srcs):
+            _len_srcs = len(srcs)
+            for l in range(_len_srcs, self.num_feature_levels):
+                if l == _len_srcs:
+                    src = self.input_proj[l](features[-1].tensors)
+                else:
+                    src = self.input_proj[l](srcs[-1])
+                m = feat_mask
+                mask = F.interpolate(m[None].float(), size=src.shape[-2:]).to(torch.bool)[0].to(src.device)
+                pos_l = self.img_pos(src).to(src.dtype)
+                srcs.append(src)
+                masks.append(mask)
+                all_pos.append(pos_l)
+        sp_inputs = self.to_patch_embedding(pixels_feat)
+        # compute the reference points
+        H_tgt = W_tgt = int(np.sqrt(sp_inputs.shape[1]))
+        reference_points_s1 = self.get_decoder_reference_points(H_tgt, W_tgt, sp_inputs.device)
+        corner_logits = self.transformer(srcs, masks, all_pos, sp_inputs, reference_points_s1, all_image_feats)
+        return corner_logits
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+    def forward(self, x):
+        mask = torch.zeros([x.shape[0], x.shape[2], x.shape[3]]).bool().to(x.device)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
+        pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos
+class CornerTransformer(nn.Module):
+    def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
+                 dim_feedforward=1024, dropout=0.1,
+                 activation="relu", return_intermediate_dec=False,
+                 num_feature_levels=4, dec_n_points=4, enc_n_points=4,
+                 ):
+        super(CornerTransformer, self).__init__()
+        encoder_layer = DeformableTransformerEncoderLayer(d_model, dim_feedforward,
+                                                          dropout, activation,
+                                                          num_feature_levels, nhead, enc_n_points)
+        self.encoder = DeformableTransformerEncoder(encoder_layer, num_encoder_layers)
+        decoder_attn_layer = DeformableAttnDecoderLayer(d_model, dim_feedforward,
+                                                        dropout, activation,
+                                                        num_feature_levels, nhead, dec_n_points)
+        self.per_edge_decoder = DeformableTransformerDecoder(decoder_attn_layer, 1, False, with_sa=False)
+        self.level_embed = nn.Parameter(torch.Tensor(num_feature_levels, d_model))
+        # upconv layers
+        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.conv_up1 = convrelu(256 + 256, 256, 3, 1)
+        self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
+        self.conv_original_size2 = convrelu(64 + 128, d_model, 3, 1)
+        self.output_fc_1 = nn.Linear(d_model, 1)
+        self.output_fc_2 = nn.Linear(d_model, 1)
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        for m in self.modules():
+            if isinstance(m, MSDeformAttn):
+                m._reset_parameters()
+        normal_(self.level_embed)
+    def get_valid_ratio(self, mask):
+        _, H, W = mask.shape
+        valid_H = torch.sum(~mask[:, :, 0], 1)
+        valid_W = torch.sum(~mask[:, 0, :], 1)
+        valid_ratio_h = valid_H.float() / H
+        valid_ratio_w = valid_W.float() / W
+        valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1)
+        return valid_ratio
+    def forward(self, srcs, masks, pos_embeds, query_embed, reference_points, all_image_feats):
+        # prepare input for encoder
+        src_flatten = []
+        mask_flatten = []
+        lvl_pos_embed_flatten = []
+        spatial_shapes = []
+        for lvl, (src, mask, pos_embed) in enumerate(zip(srcs, masks, pos_embeds)):
+            bs, c, h, w = src.shape
+            spatial_shape = (h, w)
+            spatial_shapes.append(spatial_shape)
+            src = src.flatten(2).transpose(1, 2)
+            mask = mask.flatten(1)
+            pos_embed = pos_embed.flatten(2).transpose(1, 2)
+            lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1)
+            lvl_pos_embed_flatten.append(lvl_pos_embed)
+            src_flatten.append(src)
+            mask_flatten.append(mask)
+        src_flatten = torch.cat(src_flatten, 1)
+        mask_flatten = torch.cat(mask_flatten, 1)
+        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)
+        spatial_shapes = torch.as_tensor(spatial_shapes, dtype=torch.long, device=src_flatten.device)
+        level_start_index = torch.cat((spatial_shapes.new_zeros((1,)), spatial_shapes.prod(1).cumsum(0)[:-1]))
+        valid_ratios = torch.stack([self.get_valid_ratio(m) for m in masks], 1)
+        # encoder
+        memory = self.encoder(src_flatten, spatial_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten,
+                              mask_flatten)
+        # prepare input for decoder
+        bs, _, c = memory.shape
+        tgt = query_embed
+        # relational decoder
+        hs_pixels_s1, _ = self.per_edge_decoder(tgt, reference_points, memory,
+                                                 spatial_shapes, level_start_index, valid_ratios, query_embed,
+                                                 mask_flatten)
+        feats_s1, preds_s1 = self.generate_corner_preds(hs_pixels_s1, all_image_feats)
+        return preds_s1
+    def generate_corner_preds(self, outputs, conv_outputs):
+        B, L, C = outputs.shape
+        side = int(np.sqrt(L))
+        outputs = outputs.view(B, side, side, C)
+        outputs = outputs.permute(0, 3, 1, 2)
+        outputs = torch.cat([outputs, conv_outputs['layer1']], dim=1)
+        x = self.conv_up1(outputs)
+        x = self.upsample(x)
+        x = torch.cat([x, conv_outputs['layer0']], dim=1)
+        x = self.conv_up0(x)
+        x = self.upsample(x)
+        x = torch.cat([x, conv_outputs['x_original']], dim=1)
+        x = self.conv_original_size2(x)
+        logits = x.permute(0, 2, 3, 1)
+        preds = self.output_fc_1(logits)
+        preds = preds.squeeze(-1).sigmoid()
+        return logits, preds

models/corner_to_edge.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import torch
+import numpy as np
+import scipy.ndimage.filters as filters
+import cv2
+import itertools
+NEIGHBOUR_SIZE = 5
+MATCH_THRESH = 5
+LOCAL_MAX_THRESH = 0.01
+viz_count = 0
+# pre-compute all combinations to generate edge candidates faster
+all_combibations = dict()
+for length in range(2, 351):
+    ids = np.arange(length)
+    combs = np.array(list(itertools.combinations(ids, 2)))
+    all_combibations[length] = combs
+def prepare_edge_data(c_outputs, annots, images, max_corner_num):
+    bs = c_outputs.shape[0]
+    # prepares parameters for each sample of the batch
+    all_results = list()
+    for b_i in range(bs):
+        annot = annots[b_i]
+        output = c_outputs[b_i]
+        results = process_each_sample({'annot': annot, 'output': output, 'viz_img': images[b_i]}, max_corner_num)
+        all_results.append(results)
+    processed_corners = [item['corners'] for item in all_results]
+    edge_coords = [item['edges'] for item in all_results]
+    edge_labels = [item['labels'] for item in all_results]
+    edge_info = {
+        'edge_coords': edge_coords,
+        'edge_labels': edge_labels,
+        'processed_corners': processed_corners
+    }
+    edge_data = collate_edge_info(edge_info)
+    return edge_data
+def process_annot(annot, do_round=True):
+    corners = np.array(list(annot.keys()))
+    ind = np.lexsort(corners.T)  # sort the g.t. corners to fix the order for the matching later
+    corners = corners[ind]  # sorted by y, then x
+    corner_mapping = {tuple(k): v for v, k in enumerate(corners)}
+    edges = list()
+    for c, connections in annot.items():
+        for other_c in connections:
+            edge_pair = (corner_mapping[c], corner_mapping[tuple(other_c)])
+            edges.append(edge_pair)
+    corner_degrees = [len(annot[tuple(c)]) for c in corners]
+    if do_round:
+        corners = corners.round()
+    return corners, edges, corner_degrees
+def process_each_sample(data, max_corner_num):
+    annot = data['annot']
+    output = data['output']
+    preds = output.detach().cpu().numpy()
+    data_max = filters.maximum_filter(preds, NEIGHBOUR_SIZE)
+    maxima = (preds == data_max)
+    data_min = filters.minimum_filter(preds, NEIGHBOUR_SIZE)
+    diff = ((data_max - data_min) > 0)
+    maxima[diff == 0] = 0
+    local_maximas = np.where((maxima > 0) & (preds > LOCAL_MAX_THRESH))
+    pred_corners = np.stack(local_maximas, axis=-1)[:, [1, 0]]  # to (x, y format)
+    # produce edge labels labels from pred corners here
+    processed_corners, edges, labels = get_edge_label_mix_gt(pred_corners, annot, max_corner_num)
+    # global viz_count
+    # viz_img = data['viz_img']
+    #output_path = './viz_training/{}_example_gt.png'.format(viz_count)
+    #_visualize_edge_training_data(processed_corners, edges, labels, viz_img, output_path)
+    #viz_count += 1
+    results = {
+        'corners': processed_corners,
+        'edges': edges,
+        'labels': labels,
+    }
+    return results
+def get_edge_label_mix_gt(pred_corners, annot, max_corner_num):
+    ind = np.lexsort(pred_corners.T)  # sort the pred corners to fix the order for matching
+    pred_corners = pred_corners[ind]  # sorted by y, then x
+    gt_corners, edge_pairs, corner_degrees = process_annot(annot)
+    output_to_gt = dict()
+    gt_to_output = dict()
+    diff = np.sqrt(((pred_corners[:, None] - gt_corners) ** 2).sum(-1))
+    diff = diff.T
+    if len(pred_corners) > 0:
+        for target_i, target in enumerate(gt_corners):
+            dist = diff[target_i]
+            if len(output_to_gt) > 0:
+                dist[list(output_to_gt.keys())] = 1000  # ignore already matched pred corners
+            min_dist = dist.min()
+            min_idx = dist.argmin()
+            if min_dist < MATCH_THRESH and min_idx not in output_to_gt:  # a positive match
+                output_to_gt[min_idx] = (target_i, min_dist)
+                gt_to_output[target_i] = min_idx
+    all_corners = gt_corners.copy()
+    # replace matched g.t. corners with pred corners
+    for gt_i in range(len(gt_corners)):
+       if gt_i in gt_to_output:
+            all_corners[gt_i] = pred_corners[gt_to_output[gt_i]]
+    nm_pred_ids = [i for i in range(len(pred_corners)) if i not in output_to_gt]
+    nm_pred_ids = np.random.permutation(nm_pred_ids)
+    if len(nm_pred_ids) > 0:
+        nm_pred_corners = pred_corners[nm_pred_ids]
+        #if len(nm_pred_ids) + len(all_corners) <= 150:
+        if len(nm_pred_ids) + len(all_corners) <= max_corner_num:
+            all_corners = np.concatenate([all_corners, nm_pred_corners], axis=0)
+        else:
+            #all_corners = np.concatenate([all_corners, nm_pred_corners[:(150 - len(gt_corners)), :]], axis=0)
+            all_corners = np.concatenate([all_corners, nm_pred_corners[:(max_corner_num - len(gt_corners)), :]], axis=0)
+    processed_corners, edges, edge_ids, labels = _get_edges(all_corners, edge_pairs)
+    return processed_corners, edges, labels
+def _get_edges(corners, edge_pairs):
+    ind = np.lexsort(corners.T)
+    corners = corners[ind]  # sorted by y, then x
+    corners = corners.round()
+    id_mapping = {old: new for new, old in enumerate(ind)}
+    all_ids = all_combibations[len(corners)]
+    edges = corners[all_ids]
+    labels = np.zeros(edges.shape[0])
+    N = len(corners)
+    edge_pairs = [(id_mapping[p[0]], id_mapping[p[1]]) for p in edge_pairs]
+    edge_pairs = [p for p in edge_pairs if p[0] < p[1]]
+    pos_ids = [int((2 * N - 1 - p[0]) * p[0] / 2 + p[1] - p[0] - 1) for p in edge_pairs]
+    labels[pos_ids] = 1
+    edge_ids = np.array(all_ids)
+    return corners, edges, edge_ids, labels
+def collate_edge_info(data):
+    batched_data = {}
+    lengths_info = {}
+    for field in data.keys():
+        batch_values = data[field]
+        all_lens = [len(value) for value in batch_values]
+        max_len = max(all_lens)
+        pad_value = 0
+        batch_values = [pad_sequence(value, max_len, pad_value) for value in batch_values]
+        batch_values = np.stack(batch_values, axis=0)
+        if field in ['edge_coords', 'edge_labels', 'gt_values']:
+            batch_values = torch.Tensor(batch_values).long()
+        if field in ['processed_corners', 'edge_coords']:
+            lengths_info[field] = all_lens
+        batched_data[field] = batch_values
+    # Add length and mask into the data, the mask if for Transformers' input format, True means padding
+    for field, lengths in lengths_info.items():
+        lengths_str = field + '_lengths'
+        batched_data[lengths_str] = torch.Tensor(lengths).long()
+        mask = torch.arange(max(lengths))
+        mask = mask.unsqueeze(0).repeat(batched_data[field].shape[0], 1)
+        mask = mask >= batched_data[lengths_str].unsqueeze(-1)
+        mask_str = field + '_mask'
+        batched_data[mask_str] = mask
+    return batched_data
+def pad_sequence(seq, length, pad_value=0):
+    if len(seq) == length:
+        return seq
+    else:
+        pad_len = length - len(seq)
+        if len(seq.shape) == 1:
+            if pad_value == 0:
+                paddings = np.zeros([pad_len, ])
+            else:
+                paddings = np.ones([pad_len, ]) * pad_value
+        else:
+            if pad_value == 0:
+                paddings = np.zeros([pad_len, ] + list(seq.shape[1:]))
+            else:
+                paddings = np.ones([pad_len, ] + list(seq.shape[1:])) * pad_value
+        padded_seq = np.concatenate([seq, paddings], axis=0)
+        return padded_seq
+def get_infer_edge_pairs(corners, confs):
+    ind = np.lexsort(corners.T)
+    corners = corners[ind]  # sorted by y, then x
+    confs = confs[ind]
+    edge_ids = all_combibations[len(corners)]
+    edge_coords = corners[edge_ids]
+    edge_coords = torch.tensor(np.array(edge_coords)).unsqueeze(0).long()
+    mask = torch.zeros([edge_coords.shape[0], edge_coords.shape[1]]).bool()
+    edge_ids = torch.tensor(np.array(edge_ids))
+    return corners, confs, edge_coords, mask, edge_ids
+def _visualize_edge_training_data(corners, edges, edge_labels, image, save_path):
+    image = image.transpose([1, 2, 0])
+    image = (image * 255).astype(np.uint8)
+    image = np.ascontiguousarray(image)
+    for edge, label in zip(edges, edge_labels):
+        if label == 1:
+            cv2.line(image, tuple(edge[0].astype(np.int)), tuple(edge[1].astype(np.int)), (255, 255, 0), 2)
+    for c in corners:
+        cv2.circle(image, (int(c[0]), int(c[1])), 3, (0, 0, 255), -1)
+    cv2.imwrite(save_path, image)

models/deformable_transformer.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import copy
+import torch
+from torch import nn, Tensor
+from models.ops.modules import MSDeformAttn
+import torch.nn.functional as F
+class DeformableTransformerEncoderLayer(nn.Module):
+    def __init__(self,
+                 d_model=256, d_ffn=1024,
+                 dropout=0.1, activation="relu",
+                 n_levels=4, n_heads=8, n_points=4):
+        super().__init__()
+        # self attention
+        self.self_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+        # ffn
+        self.linear1 = nn.Linear(d_model, d_ffn)
+        self.activation = _get_activation_fn(activation)
+        self.dropout2 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(d_ffn, d_model)
+        self.dropout3 = nn.Dropout(dropout)
+        self.norm2 = nn.LayerNorm(d_model)
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+    def forward_ffn(self, src):
+        src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
+        src = src + self.dropout3(src2)
+        src = self.norm2(src)
+        return src
+    def forward(self, src, pos, reference_points, spatial_shapes, level_start_index, padding_mask=None):
+        # self attention
+        src2 = self.self_attn(self.with_pos_embed(src, pos), reference_points, src, spatial_shapes, level_start_index,
+                              padding_mask)
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        # ffn
+        src = self.forward_ffn(src)
+        return src
+class DeformableTransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+    @staticmethod
+    def get_reference_points(spatial_shapes, valid_ratios, device):
+        reference_points_list = []
+        for lvl, (H_, W_) in enumerate(spatial_shapes):
+            ref_y, ref_x = torch.meshgrid(torch.linspace(0.5, H_ - 0.5, H_, dtype=torch.float32, device=device),
+                                          torch.linspace(0.5, W_ - 0.5, W_, dtype=torch.float32, device=device))
+            ref_y = ref_y.reshape(-1)[None] / (valid_ratios[:, None, lvl, 1] * H_)
+            ref_x = ref_x.reshape(-1)[None] / (valid_ratios[:, None, lvl, 0] * W_)
+            ref = torch.stack((ref_x, ref_y), -1)
+            reference_points_list.append(ref)
+        reference_points = torch.cat(reference_points_list, 1)
+        reference_points = reference_points[:, :, None] * valid_ratios[:, None]
+        return reference_points
+    def forward(self, src, spatial_shapes, level_start_index, valid_ratios, pos=None, padding_mask=None):
+        output = src
+        reference_points = self.get_reference_points(spatial_shapes, valid_ratios, device=src.device)
+        for _, layer in enumerate(self.layers):
+            output = layer(output, pos, reference_points, spatial_shapes, level_start_index, padding_mask)
+        return output
+class DeformableAttnDecoderLayer(nn.Module):
+    def __init__(self, d_model=256, d_ffn=1024,
+                 dropout=0.1, activation="relu",
+                 n_levels=4, n_heads=8, n_points=4):
+        super().__init__()
+        # cross attention
+        self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+        # ffn
+        self.linear1 = nn.Linear(d_model, d_ffn)
+        self.activation = _get_activation_fn(activation)
+        self.dropout3 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(d_ffn, d_model)
+        self.dropout4 = nn.Dropout(dropout)
+        self.norm3 = nn.LayerNorm(d_model)
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+    def forward_ffn(self, tgt):
+        tgt2 = self.linear2(self.dropout3(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout4(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+    def forward(self, tgt, query_pos, reference_points, src, src_spatial_shapes, level_start_index,
+                src_padding_mask=None,
+                key_padding_mask=None):
+        # cross attention
+        tgt2 = self.cross_attn(self.with_pos_embed(tgt, query_pos),
+                               reference_points,
+                               src, src_spatial_shapes, level_start_index, src_padding_mask)
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+        # ffn
+        tgt = self.forward_ffn(tgt)
+        return tgt
+class DeformableTransformerDecoderLayer(nn.Module):
+    def __init__(self, d_model=256, d_ffn=1024,
+                 dropout=0.1, activation="relu",
+                 n_levels=4, n_heads=8, n_points=4):
+        super().__init__()
+        # cross attention
+        self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+        # self attention
+        self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.norm2 = nn.LayerNorm(d_model)
+        # ffn
+        self.linear1 = nn.Linear(d_model, d_ffn)
+        self.activation = _get_activation_fn(activation)
+        self.dropout3 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(d_ffn, d_model)
+        self.dropout4 = nn.Dropout(dropout)
+        self.norm3 = nn.LayerNorm(d_model)
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+    def forward_ffn(self, tgt):
+        tgt2 = self.linear2(self.dropout3(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout4(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+    def forward(self, tgt, query_pos, reference_points, src, src_spatial_shapes, level_start_index,
+                src_padding_mask=None,
+                key_padding_mask=None,
+                get_image_feat=True):
+        # self attention
+        q = k = self.with_pos_embed(tgt, query_pos)
+        tgt2 = \
+        self.self_attn(q.transpose(0, 1), k.transpose(0, 1), tgt.transpose(0, 1), key_padding_mask=key_padding_mask)[
+            0].transpose(0, 1)
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+        if get_image_feat:
+            # cross attention
+            tgt2 = self.cross_attn(self.with_pos_embed(tgt, query_pos),
+                                   reference_points,
+                                   src, src_spatial_shapes, level_start_index, src_padding_mask)
+            tgt = tgt + self.dropout1(tgt2)
+            tgt = self.norm1(tgt)
+        # ffn
+        tgt = self.forward_ffn(tgt)
+        return tgt
+class DeformableTransformerDecoder(nn.Module):
+    def __init__(self, decoder_layer, num_layers, return_intermediate=False, with_sa=True):
+        super().__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.return_intermediate = return_intermediate
+        # hack implementation for iterative bounding box refinement and two-stage Deformable DETR
+        self.with_sa = with_sa
+    def forward(self, tgt, reference_points, src, src_spatial_shapes, src_level_start_index, src_valid_ratios,
+                query_pos=None, src_padding_mask=None, key_padding_mask=None, get_image_feat=True):
+        output = tgt
+        intermediate = []
+        intermediate_reference_points = []
+        for lid, layer in enumerate(self.layers):
+            if reference_points.shape[-1] == 4:
+                reference_points_input = reference_points[:, :, None] \
+                                         * torch.cat([src_valid_ratios, src_valid_ratios], -1)[:, None]
+            else:
+                assert reference_points.shape[-1] == 2
+                reference_points_input = reference_points[:, :, None] * src_valid_ratios[:, None]
+            if self.with_sa:
+                output = layer(output, query_pos, reference_points_input, src, src_spatial_shapes, src_level_start_index,
+                               src_padding_mask, key_padding_mask, get_image_feat)
+            else:
+                output = layer(output, query_pos, reference_points_input, src, src_spatial_shapes,
+                               src_level_start_index,
+                               src_padding_mask, key_padding_mask)
+            if self.return_intermediate:
+                intermediate.append(output)
+                intermediate_reference_points.append(reference_points)
+        if self.return_intermediate:
+            return torch.stack(intermediate), torch.stack(intermediate_reference_points)
+        return output, reference_points
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")

models/edge_models.py ADDED Viewed

	@@ -0,0 +1,314 @@

+# coding=utf-8
+import torch
+import torch.nn as nn
+import numpy as np
+from models.mlp import MLP
+from models.deformable_transformer import DeformableTransformerEncoderLayer, DeformableTransformerEncoder, \
+    DeformableTransformerDecoder, DeformableTransformerDecoderLayer, DeformableAttnDecoderLayer
+from models.ops.modules import MSDeformAttn
+from models.corner_models import PositionEmbeddingSine
+from torch.nn.init import xavier_uniform_, constant_, uniform_, normal_
+import torch.nn.functional as F
+from utils.misc import NestedTensor
+class HeatEdge(nn.Module):
+    def __init__(self, input_dim, hidden_dim, num_feature_levels, backbone_strides, backbone_num_channels, ):
+        super(HeatEdge, self).__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.num_feature_levels = num_feature_levels
+        if num_feature_levels > 1:
+            num_backbone_outs = len(backbone_strides)
+            input_proj_list = []
+            for _ in range(num_backbone_outs):
+                in_channels = backbone_num_channels[_]
+                input_proj_list.append(nn.Sequential(
+                    nn.Conv2d(in_channels, hidden_dim, kernel_size=1),
+                    nn.GroupNorm(32, hidden_dim),
+                ))
+            for _ in range(num_feature_levels - num_backbone_outs):
+                input_proj_list.append(nn.Sequential(
+                    nn.Conv2d(in_channels, hidden_dim, kernel_size=3, stride=2, padding=1),
+                    nn.GroupNorm(32, hidden_dim),
+                ))
+                in_channels = hidden_dim
+            self.input_proj = nn.ModuleList(input_proj_list)
+        else:
+            self.input_proj = nn.ModuleList([
+                nn.Sequential(
+                    nn.Conv2d(backbone_num_channels[0], hidden_dim, kernel_size=1),
+                    nn.GroupNorm(32, hidden_dim),
+                )])
+        self.img_pos = PositionEmbeddingSine(hidden_dim // 2)
+        self.edge_input_fc = nn.Linear(input_dim * 2, hidden_dim)
+        self.output_fc = MLP(input_dim=hidden_dim, hidden_dim=hidden_dim // 2, output_dim=2, num_layers=2)
+        self.transformer = EdgeTransformer(d_model=hidden_dim, nhead=8, num_encoder_layers=1,
+                                           num_decoder_layers=6, dim_feedforward=1024, dropout=0.1)
+    @staticmethod
+    def get_ms_feat(xs, img_mask):
+        out: Dict[str, NestedTensor] = {}
+        for name, x in sorted(xs.items()):
+            m = img_mask
+            assert m is not None
+            mask = F.interpolate(m[None].float(), size=x.shape[-2:]).to(torch.bool)[0]
+            out[name] = NestedTensor(x, mask)
+        return out
+    def forward(self, image_feats, feat_mask, corner_outputs, edge_coords, edge_masks, gt_values, corner_nums,
+                max_candidates, do_inference=False):
+        # Prepare ConvNet features
+        features = self.get_ms_feat(image_feats, feat_mask)
+        srcs = []
+        masks = []
+        all_pos = []
+        new_features = list()
+        for name, x in sorted(features.items()):
+            new_features.append(x)
+        features = new_features
+        for l, feat in enumerate(features):
+            src, mask = feat.decompose()
+            mask = mask.to(src.device)
+            srcs.append(self.input_proj[l](src))
+            pos = self.img_pos(src).to(src.dtype)
+            all_pos.append(pos)
+            masks.append(mask)
+            assert mask is not None
+        if self.num_feature_levels > len(srcs):
+            _len_srcs = len(srcs)
+            for l in range(_len_srcs, self.num_feature_levels):
+                if l == _len_srcs:
+                    src = self.input_proj[l](features[-1].tensors)
+                else:
+                    src = self.input_proj[l](srcs[-1])
+                m = feat_mask
+                mask = F.interpolate(m[None].float(), size=src.shape[-2:]).to(torch.bool)[0].to(src.device)
+                pos_l = self.img_pos(src).to(src.dtype)
+                srcs.append(src)
+                masks.append(mask)
+                all_pos.append(pos_l)
+        bs = edge_masks.size(0)
+        num_edges = edge_masks.size(1)
+        corner_feats = corner_outputs
+        edge_feats = list()
+        for b_i in range(bs):
+            feats = corner_feats[b_i, edge_coords[b_i, :, :, 1], edge_coords[b_i, :, :, 0], :]
+            edge_feats.append(feats)
+        edge_feats = torch.stack(edge_feats, dim=0)
+        edge_feats = edge_feats.view(bs, num_edges, -1)
+        edge_inputs = self.edge_input_fc(edge_feats.view(bs * num_edges, -1))
+        edge_inputs = edge_inputs.view(bs, num_edges, -1)
+        edge_center = (edge_coords[:, :, 0, :].float() + edge_coords[:, :, 1, :].float()) / 2
+        edge_center = edge_center / feat_mask.shape[1]
+        logits_per_edge, logits_hb, logits_rel, selection_ids, s2_attn_mask, s2_gt_values = self.transformer(srcs,
+                                                                                                             masks,
+                                                                                                             all_pos,
+                                                                                                             edge_inputs,
+                                                                                                             edge_center,
+                                                                                                             gt_values,
+                                                                                                             edge_masks,
+                                                                                                             corner_nums,
+                                                                                                             max_candidates,
+                                                                                                             do_inference)
+        return logits_per_edge, logits_hb, logits_rel, selection_ids, s2_attn_mask, s2_gt_values
+class EdgeTransformer(nn.Module):
+    def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
+                 num_decoder_layers=6, dim_feedforward=1024, dropout=0.1,
+                 activation="relu", return_intermediate_dec=False,
+                 num_feature_levels=4, dec_n_points=4, enc_n_points=4,
+                 ):
+        super(EdgeTransformer, self).__init__()
+        encoder_layer = DeformableTransformerEncoderLayer(d_model, dim_feedforward,
+                                                          dropout, activation,
+                                                          num_feature_levels, nhead, enc_n_points)
+        self.encoder = DeformableTransformerEncoder(encoder_layer, num_encoder_layers)
+        decoder_attn_layer = DeformableAttnDecoderLayer(d_model, dim_feedforward,
+                                                        dropout, activation,
+                                                        num_feature_levels, nhead, dec_n_points)
+        # one-layer decoder, without self-attention layers
+        self.per_edge_decoder = DeformableTransformerDecoder(decoder_attn_layer, 1, False, with_sa=False)
+        decoder_layer = DeformableTransformerDecoderLayer(d_model, dim_feedforward,
+                                                          dropout, activation,
+                                                          num_feature_levels, nhead, dec_n_points)
+        # edge decoder w/ self-attention layers (image-aware decoder and geom-only decoder)
+        self.relational_decoder = DeformableTransformerDecoder(decoder_layer, num_decoder_layers,
+                                                               return_intermediate_dec, with_sa=True)
+        self.level_embed = nn.Parameter(torch.Tensor(num_feature_levels, d_model))
+        self.gt_label_embed = nn.Embedding(3, d_model)
+        self.input_fc_hb = MLP(input_dim=2 * d_model, hidden_dim=d_model, output_dim=d_model, num_layers=2)
+        self.input_fc_rel = MLP(input_dim=2 * d_model, hidden_dim=d_model, output_dim=d_model, num_layers=2)
+        self.output_fc_1 = MLP(input_dim=d_model, hidden_dim=d_model // 2, output_dim=2, num_layers=2)
+        self.output_fc_2 = MLP(input_dim=d_model, hidden_dim=d_model // 2, output_dim=2, num_layers=2)
+        self.output_fc_3 = MLP(input_dim=d_model, hidden_dim=d_model // 2, output_dim=2, num_layers=2)
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        for m in self.modules():
+            if isinstance(m, MSDeformAttn):
+                m._reset_parameters()
+        normal_(self.level_embed)
+    def get_valid_ratio(self, mask):
+        _, H, W = mask.shape
+        valid_H = torch.sum(~mask[:, :, 0], 1)
+        valid_W = torch.sum(~mask[:, 0, :], 1)
+        valid_ratio_h = valid_H.float() / H
+        valid_ratio_w = valid_W.float() / W
+        valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1)
+        return valid_ratio
+    def forward(self, srcs, masks, pos_embeds, query_embed, reference_points, labels, key_padding_mask, corner_nums,
+                max_candidates, do_inference=False):
+        # prepare input for encoder
+        src_flatten = []
+        mask_flatten = []
+        lvl_pos_embed_flatten = []
+        spatial_shapes = []
+        for lvl, (src, mask, pos_embed) in enumerate(zip(srcs, masks, pos_embeds)):
+            bs, c, h, w = src.shape
+            spatial_shape = (h, w)
+            spatial_shapes.append(spatial_shape)
+            src = src.flatten(2).transpose(1, 2)
+            mask = mask.flatten(1)
+            pos_embed = pos_embed.flatten(2).transpose(1, 2)
+            lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1)
+            lvl_pos_embed_flatten.append(lvl_pos_embed)
+            src_flatten.append(src)
+            mask_flatten.append(mask)
+        src_flatten = torch.cat(src_flatten, 1)
+        mask_flatten = torch.cat(mask_flatten, 1)
+        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)
+        spatial_shapes = torch.as_tensor(spatial_shapes, dtype=torch.long, device=src_flatten.device)
+        level_start_index = torch.cat((spatial_shapes.new_zeros((1,)), spatial_shapes.prod(1).cumsum(0)[:-1]))
+        valid_ratios = torch.stack([self.get_valid_ratio(m) for m in masks], 1)
+        # encoder
+        memory = self.encoder(src_flatten, spatial_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten,
+                              mask_flatten)
+        # prepare input for decoder
+        bs, _, c = memory.shape
+        tgt = query_embed
+        # per-edge filtering with single-layer decoder (no self-attn)
+        hs_per_edge, _ = self.per_edge_decoder(tgt, reference_points, memory,
+                                               spatial_shapes, level_start_index, valid_ratios, query_embed,
+                                               mask_flatten)
+        logits_per_edge = self.output_fc_1(hs_per_edge).permute(0, 2, 1)
+        filtered_hs, filtered_mask, filtered_query, filtered_rp, filtered_labels, selected_ids = self.candidate_filtering(
+            logits_per_edge,
+            hs_per_edge, query_embed, reference_points,
+            labels,
+            key_padding_mask, corner_nums, max_candidates)
+        # generate the info for masked training
+        if not do_inference:
+            filtered_gt_values = self.generate_gt_masking(filtered_labels, filtered_mask)
+        else:
+            filtered_gt_values = filtered_labels
+        gt_info = self.gt_label_embed(filtered_gt_values)
+        # relational decoder with image feature (image-aware decoder)
+        hybrid_prim_hs = self.input_fc_hb(torch.cat([filtered_hs, gt_info], dim=-1))
+        hs, inter_references = self.relational_decoder(hybrid_prim_hs, filtered_rp, memory,
+                                                       spatial_shapes, level_start_index, valid_ratios, filtered_query,
+                                                       mask_flatten,
+                                                       key_padding_mask=filtered_mask, get_image_feat=True)
+        logits_final_hb = self.output_fc_2(hs).permute(0, 2, 1)
+        # relational decoder without image feature (geom-only decoder)
+        rel_prim_hs = self.input_fc_rel(torch.cat([filtered_query, gt_info], dim=-1))
+        hs_rel, _ = self.relational_decoder(rel_prim_hs, filtered_rp, memory,
+                                            spatial_shapes, level_start_index, valid_ratios, filtered_query,
+                                            mask_flatten,
+                                            key_padding_mask=filtered_mask, get_image_feat=False)
+        logits_final_rel = self.output_fc_3(hs_rel).permute(0, 2, 1)
+        return logits_per_edge, logits_final_hb, logits_final_rel, selected_ids, filtered_mask, filtered_gt_values
+    @staticmethod
+    def candidate_filtering(logits, hs, query, rp, labels, key_padding_mask, corner_nums, max_candidates):
+        """
+            Filter out the easy-negatives from the edge candidates, and update the edge information correspondingly
+        """
+        B, L, _ = hs.shape
+        preds = logits.detach().softmax(1)[:, 1, :]  # BxL
+        preds[key_padding_mask == True] = -1  # ignore the masking parts
+        sorted_ids = torch.argsort(preds, dim=-1, descending=True)
+        filtered_hs = list()
+        filtered_mask = list()
+        filtered_query = list()
+        filtered_rp = list()
+        filtered_labels = list()
+        selected_ids = list()
+        for b_i in range(B):
+            num_candidates = corner_nums[b_i] * 3
+            ids = sorted_ids[b_i, :max_candidates[b_i]]
+            filtered_hs.append(hs[b_i][ids])
+            new_mask = key_padding_mask[b_i][ids]
+            new_mask[num_candidates:] = True
+            filtered_mask.append(new_mask)
+            filtered_query.append(query[b_i][ids])
+            filtered_rp.append(rp[b_i][ids])
+            filtered_labels.append(labels[b_i][ids])
+            selected_ids.append(ids)
+        filtered_hs = torch.stack(filtered_hs, dim=0)
+        filtered_mask = torch.stack(filtered_mask, dim=0)
+        filtered_query = torch.stack(filtered_query, dim=0)
+        filtered_rp = torch.stack(filtered_rp, dim=0)
+        filtered_labels = torch.stack(filtered_labels, dim=0)
+        selected_ids = torch.stack(selected_ids, dim=0)
+        return filtered_hs, filtered_mask, filtered_query, filtered_rp, filtered_labels, selected_ids
+    @staticmethod
+    def generate_gt_masking(labels, mask):
+        """
+            Generate the info for masked training on-the-fly with ratio=0.5
+        """
+        bs = labels.shape[0]
+        gt_values = torch.zeros_like(mask).long()
+        for b_i in range(bs):
+            edge_length = (mask[b_i] == 0).sum()
+            rand_ratio = np.random.rand() * 0.5 + 0.5
+            gt_rand = torch.rand(edge_length)
+            gt_flag = torch.zeros(edge_length)
+            gt_flag[torch.where(gt_rand >= rand_ratio)] = 1
+            gt_idx = torch.where(gt_flag == 1)
+            pred_idx = torch.where(gt_flag == 0)
+            gt_values[b_i, gt_idx[0]] = labels[b_i, gt_idx[0]]
+            gt_values[b_i, pred_idx[0]] = 2  # use 2 to represent unknown value, need to predict
+        return gt_values

models/loss.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+from utils.geometry_utils import edge_acc
+class CornerCriterion(nn.Module):
+    def __init__(self, image_size):
+        super().__init__()
+        self.loss_rate = 9
+    def forward(self, outputs_s1, targets, gauss_targets, epoch=0):
+        # Compute the acc first, use the acc to guide the setup of loss weight
+        preds_s1 = (outputs_s1 >= 0.5).float()
+        pos_target_ids = torch.where(targets == 1)
+        correct = (preds_s1[pos_target_ids] == targets[pos_target_ids]).float().sum()
+        recall_s1 = correct / len(pos_target_ids[0])
+        rate = self.loss_rate
+        loss_weight = (gauss_targets > 0.5).float() * rate + 1
+        loss_s1 = F.binary_cross_entropy(outputs_s1, gauss_targets, weight=loss_weight, reduction='none')
+        loss_s1 = loss_s1.sum(-1).sum(-1).mean()
+        return loss_s1, recall_s1
+class EdgeCriterion(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.edge_loss = nn.CrossEntropyLoss(weight=torch.tensor([0.33, 1.0]).cuda(), reduction='none')
+    def forward(self, logits_s1, logits_s2_hybrid, logits_s2_rel, s2_ids, s2_edge_mask, edge_labels, edge_lengths,
+                edge_mask, s2_gt_values):
+        # loss for edge filtering
+        s1_losses = self.edge_loss(logits_s1, edge_labels)
+        s1_losses[torch.where(edge_mask == True)] = 0
+        s1_losses = s1_losses[torch.where(s1_losses > 0)].sum() / edge_mask.shape[0]
+        gt_values = torch.ones_like(edge_mask).long() * 2
+        s1_acc = edge_acc(logits_s1, edge_labels, edge_lengths, gt_values)
+        # loss for stage-2
+        s2_labels = torch.gather(edge_labels, 1, s2_ids)
+        # the image-aware decoder
+        s2_losses_hybrid = self.edge_loss(logits_s2_hybrid, s2_labels)
+        s2_losses_hybrid[torch.where((s2_edge_mask == True) | (s2_gt_values != 2))] = 0
+        # aggregate the loss into the final scalar
+        s2_losses_hybrid = s2_losses_hybrid[torch.where(s2_losses_hybrid > 0)].sum() / s2_edge_mask.shape[0]
+        s2_edge_lengths = (s2_edge_mask == 0).sum(dim=-1)
+        # compute edge-level acc
+        s2_acc_hybrid = edge_acc(logits_s2_hybrid, s2_labels, s2_edge_lengths, s2_gt_values)
+        # the geom-only decoder
+        s2_losses_rel = self.edge_loss(logits_s2_rel, s2_labels)
+        s2_losses_rel[torch.where((s2_edge_mask == True) | (s2_gt_values != 2))] = 0
+        # aggregate the loss into the final scalar
+        s2_losses_rel = s2_losses_rel[torch.where(s2_losses_rel > 0)].sum() / s2_edge_mask.shape[0]
+        s2_edge_lengths = (s2_edge_mask == 0).sum(dim=-1)
+        # compute edge-level f1-score
+        s2_acc_rel = edge_acc(logits_s2_rel, s2_labels, s2_edge_lengths, s2_gt_values)
+        return s1_losses, s1_acc, s2_losses_hybrid, s2_acc_hybrid, s2_losses_rel, s2_acc_rel

models/mlp.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import torch.nn as nn
+import torch.nn.functional as F
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super(MLP, self).__init__()
+        self.output_dim = output_dim
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+    def forward(self, x):
+        B, N, D = x.size()
+        x = x.reshape(B*N, D)
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        x = x.view(B, N, self.output_dim)
+        return x

models/ops/functions/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+from .ms_deform_attn_func import MSDeformAttnFunction

models/ops/functions/ms_deform_attn_func.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+import torch
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+import MultiScaleDeformableAttention as MSDA
+class MSDeformAttnFunction(Function):
+    @staticmethod
+    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
+        ctx.im2col_step = im2col_step
+        output = MSDA.ms_deform_attn_forward(
+            value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
+        ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
+        return output
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
+        grad_value, grad_sampling_loc, grad_attn_weight = \
+            MSDA.ms_deform_attn_backward(
+                value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
+        return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
+def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
+    # for debug and test only,
+    # need to use cuda version instead
+    N_, S_, M_, D_ = value.shape
+    _, Lq_, M_, L_, P_, _ = sampling_locations.shape
+    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
+    sampling_grids = 2 * sampling_locations - 1
+    sampling_value_list = []
+    for lid_, (H_, W_) in enumerate(value_spatial_shapes):
+        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
+        value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
+        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
+        sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
+        # N_*M_, D_, Lq_, P_
+        sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
+                                          mode='bilinear', padding_mode='zeros', align_corners=False)
+        sampling_value_list.append(sampling_value_l_)
+    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
+    attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
+    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
+    return output.transpose(1, 2).contiguous()

models/ops/make.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+#!/usr/bin/env bash
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+python3 setup.py build install --user

models/ops/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+from .ms_deform_attn import MSDeformAttn

models/ops/modules/ms_deform_attn.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+import warnings
+import math
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+from ..functions import MSDeformAttnFunction
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+class MSDeformAttn(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+        self.im2col_step = 64
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+        self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2)
+        self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+        self._reset_parameters()
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1)
+        for i in range(self.n_points):
+            grid_init[:, :, i, :] *= i + 1
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area
+                                        or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C)
+        :param input_spatial_shapes        (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
+        :param input_level_start_index     (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 2
+        if reference_points.shape[-1] == 2:
+            offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1)
+            sampling_locations = reference_points[:, :, None, :, None, :] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None, :]
+        elif reference_points.shape[-1] == 4:
+            sampling_locations = reference_points[:, :, None, :, None, :2] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1]))
+        output = MSDeformAttnFunction.apply(
+            value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, self.im2col_step)
+        output = self.output_proj(output)
+        return output

models/ops/setup.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+import os
+import glob
+import torch
+from torch.utils.cpp_extension import CUDA_HOME
+from torch.utils.cpp_extension import CppExtension
+from torch.utils.cpp_extension import CUDAExtension
+from setuptools import find_packages
+from setuptools import setup
+requirements = ["torch", "torchvision"]
+def get_extensions():
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    extensions_dir = os.path.join(this_dir, "src")
+    main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
+    source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
+    source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
+    sources = main_file + source_cpu
+    extension = CppExtension
+    extra_compile_args = {"cxx": []}
+    define_macros = []
+    if torch.cuda.is_available() and CUDA_HOME is not None:
+        extension = CUDAExtension
+        sources += source_cuda
+        define_macros += [("WITH_CUDA", None)]
+        extra_compile_args["nvcc"] = [
+            "-DCUDA_HAS_FP16=1",
+            "-D__CUDA_NO_HALF_OPERATORS__",
+            "-D__CUDA_NO_HALF_CONVERSIONS__",
+            "-D__CUDA_NO_HALF2_OPERATORS__",
+        ]
+    else:
+        raise NotImplementedError('Cuda is not availabel')
+    sources = [os.path.join(extensions_dir, s) for s in sources]
+    include_dirs = [extensions_dir]
+    ext_modules = [
+        extension(
+            "MultiScaleDeformableAttention",
+            sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+    return ext_modules
+setup(
+    name="MultiScaleDeformableAttention",
+    version="1.0",
+    author="Weijie Su",
+    url="https://github.com/fundamentalvision/Deformable-DETR",
+    description="PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention",
+    packages=find_packages(exclude=("configs", "tests",)),
+    ext_modules=get_extensions(),
+    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
+)

models/ops/src/cpu/ms_deform_attn_cpu.cpp ADDED Viewed

	@@ -0,0 +1,41 @@

+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#include <vector>
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+at::Tensor
+ms_deform_attn_cpu_forward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    AT_ERROR("Not implement on cpu");
+}
+std::vector<at::Tensor>
+ms_deform_attn_cpu_backward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    AT_ERROR("Not implement on cpu");
+}

models/ops/src/cpu/ms_deform_attn_cpu.h ADDED Viewed

	@@ -0,0 +1,33 @@

+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#pragma once
+#include <torch/extension.h>
+at::Tensor
+ms_deform_attn_cpu_forward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step);
+std::vector<at::Tensor>
+ms_deform_attn_cpu_backward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step);

models/ops/src/cuda/ms_deform_attn_cuda.cu ADDED Viewed

	@@ -0,0 +1,153 @@

+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#include <vector>
+#include "cuda/ms_deform_im2col_cuda.cuh"
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+at::Tensor ms_deform_attn_cuda_forward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
+    AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
+    AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
+    AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
+    AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
+    AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
+    AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
+    AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
+    AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
+    AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
+    const int batch = value.size(0);
+    const int spatial_size = value.size(1);
+    const int num_heads = value.size(2);
+    const int channels = value.size(3);
+    const int num_levels = spatial_shapes.size(0);
+    const int num_query = sampling_loc.size(1);
+    const int num_point = sampling_loc.size(4);
+    const int im2col_step_ = std::min(batch, im2col_step);
+    AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
+    auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());
+    const int batch_n = im2col_step_;
+    auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
+    auto per_value_size = spatial_size * num_heads * channels;
+    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
+    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
+    for (int n = 0; n < batch/im2col_step_; ++n)
+    {
+        auto columns = output_n.select(0, n);
+        AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] {
+            ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),
+                value.data<scalar_t>() + n * im2col_step_ * per_value_size,
+                spatial_shapes.data<int64_t>(),
+                level_start_index.data<int64_t>(),
+                sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
+                batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
+                columns.data<scalar_t>());
+        }));
+    }
+    output = output.view({batch, num_query, num_heads*channels});
+    return output;
+}
+std::vector<at::Tensor> ms_deform_attn_cuda_backward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
+    AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
+    AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
+    AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
+    AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
+    AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous");
+    AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
+    AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
+    AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
+    AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
+    AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
+    AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor");
+    const int batch = value.size(0);
+    const int spatial_size = value.size(1);
+    const int num_heads = value.size(2);
+    const int channels = value.size(3);
+    const int num_levels = spatial_shapes.size(0);
+    const int num_query = sampling_loc.size(1);
+    const int num_point = sampling_loc.size(4);
+    const int im2col_step_ = std::min(batch, im2col_step);
+    AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
+    auto grad_value = at::zeros_like(value);
+    auto grad_sampling_loc = at::zeros_like(sampling_loc);
+    auto grad_attn_weight = at::zeros_like(attn_weight);
+    const int batch_n = im2col_step_;
+    auto per_value_size = spatial_size * num_heads * channels;
+    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
+    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
+    auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
+    for (int n = 0; n < batch/im2col_step_; ++n)
+    {
+        auto grad_output_g = grad_output_n.select(0, n);
+        AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] {
+            ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),
+                                    grad_output_g.data<scalar_t>(),
+                                    value.data<scalar_t>() + n * im2col_step_ * per_value_size,
+                                    spatial_shapes.data<int64_t>(),
+                                    level_start_index.data<int64_t>(),
+                                    sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                                    attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
+                                    batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
+                                    grad_value.data<scalar_t>() +  n * im2col_step_ * per_value_size,
+                                    grad_sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                                    grad_attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size);
+        }));
+    }
+    return {
+        grad_value, grad_sampling_loc, grad_attn_weight
+    };
+}

models/ops/src/cuda/ms_deform_attn_cuda.h ADDED Viewed

	@@ -0,0 +1,30 @@

+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#pragma once
+#include <torch/extension.h>
+at::Tensor ms_deform_attn_cuda_forward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step);
+std::vector<at::Tensor> ms_deform_attn_cuda_backward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step);

models/ops/src/cuda/ms_deform_im2col_cuda.cuh ADDED Viewed

	@@ -0,0 +1,1327 @@

+/*!
+**************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************
+* Modified from DCN (https://github.com/msracver/Deformable-ConvNets)
+* Copyright (c) 2018 Microsoft
+**************************************************************************
+*/
+#include <cstdio>
+#include <algorithm>
+#include <cstring>
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <THC/THCAtomics.cuh>
+#define CUDA_KERNEL_LOOP(i, n)                          \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x;   \
+      i < (n);                                          \
+      i += blockDim.x * gridDim.x)
+const int CUDA_NUM_THREADS = 1024;
+inline int GET_BLOCKS(const int N, const int num_threads)
+{
+  return (N + num_threads - 1) / num_threads;
+}
+template <typename scalar_t>
+__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t* &bottom_data,
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+  }
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+template <typename scalar_t>
+__device__ void ms_deform_attn_col2im_bilinear(const scalar_t* &bottom_data,
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c,
+                                                   const scalar_t &top_grad,
+                                                   const scalar_t &attn_weight,
+                                                   scalar_t* &grad_value,
+                                                   scalar_t* grad_sampling_loc,
+                                                   scalar_t* grad_attn_weight)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+  const scalar_t top_grad_value = top_grad * attn_weight;
+  scalar_t grad_h_weight = 0, grad_w_weight = 0;
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+    grad_h_weight -= hw * v1;
+    grad_w_weight -= hh * v1;
+    atomicAdd(grad_value+ptr1, w1*top_grad_value);
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+    grad_h_weight -= lw * v2;
+    grad_w_weight += hh * v2;
+    atomicAdd(grad_value+ptr2, w2*top_grad_value);
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+    grad_h_weight += hw * v3;
+    grad_w_weight -= lh * v3;
+    atomicAdd(grad_value+ptr3, w3*top_grad_value);
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+    grad_h_weight += lw * v4;
+    grad_w_weight += lh * v4;
+    atomicAdd(grad_value+ptr4, w4*top_grad_value);
+  }
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  *grad_attn_weight = top_grad * val;
+  *grad_sampling_loc = width * grad_w_weight * top_grad_value;
+  *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value;
+}
+template <typename scalar_t>
+__device__ void ms_deform_attn_col2im_bilinear_gm(const scalar_t* &bottom_data,
+                                                   const int &height, const int &width, const int &nheads, const int &channels,
+                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c,
+                                                   const scalar_t &top_grad,
+                                                   const scalar_t &attn_weight,
+                                                   scalar_t* &grad_value,
+                                                   scalar_t* grad_sampling_loc,
+                                                   scalar_t* grad_attn_weight)
+{
+  const int h_low = floor(h);
+  const int w_low = floor(w);
+  const int h_high = h_low + 1;
+  const int w_high = w_low + 1;
+  const scalar_t lh = h - h_low;
+  const scalar_t lw = w - w_low;
+  const scalar_t hh = 1 - lh, hw = 1 - lw;
+  const int w_stride = nheads * channels;
+  const int h_stride = width * w_stride;
+  const int h_low_ptr_offset = h_low * h_stride;
+  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+  const int w_low_ptr_offset = w_low * w_stride;
+  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+  const int base_ptr = m * channels + c;
+  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+  const scalar_t top_grad_value = top_grad * attn_weight;
+  scalar_t grad_h_weight = 0, grad_w_weight = 0;
+  scalar_t v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+  {
+    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+    v1 = bottom_data[ptr1];
+    grad_h_weight -= hw * v1;
+    grad_w_weight -= hh * v1;
+    atomicAdd(grad_value+ptr1, w1*top_grad_value);
+  }
+  scalar_t v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+  {
+    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+    v2 = bottom_data[ptr2];
+    grad_h_weight -= lw * v2;
+    grad_w_weight += hh * v2;
+    atomicAdd(grad_value+ptr2, w2*top_grad_value);
+  }
+  scalar_t v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+  {
+    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+    v3 = bottom_data[ptr3];
+    grad_h_weight += hw * v3;
+    grad_w_weight -= lh * v3;
+    atomicAdd(grad_value+ptr3, w3*top_grad_value);
+  }
+  scalar_t v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+  {
+    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+    v4 = bottom_data[ptr4];
+    grad_h_weight += lw * v4;
+    grad_w_weight += lh * v4;
+    atomicAdd(grad_value+ptr4, w4*top_grad_value);
+  }
+  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  atomicAdd(grad_attn_weight, top_grad * val);
+  atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value);
+  atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value);
+}
+template <typename scalar_t>
+__global__ void ms_deformable_im2col_gpu_kernel(const int n,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index,
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size,
+                                                const int spatial_size,
+                                                const int num_heads,
+                                                const int channels,
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *data_col)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp;
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+    scalar_t *data_col_ptr = data_col + index;
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    scalar_t col = 0;
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const scalar_t *data_value_ptr = data_value + (data_value_ptr_init_offset + level_start_id * qid_stride);
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col) * weight;
+        }
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+      }
+    }
+    *data_col_ptr = col;
+  }
+}
+template <typename scalar_t, unsigned int blockSize>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index,
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size,
+                                                const int spatial_size,
+                                                const int num_heads,
+                                                const int channels,
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
+    __shared__ scalar_t cache_grad_attn_weight[blockSize];
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp;
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+    const scalar_t top_grad = grad_col[index];
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr,
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        __syncthreads();
+        if (tid == 0)
+        {
+          scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0];
+          int sid=2;
+          for (unsigned int tid = 1; tid < blockSize; ++tid)
+          {
+            _grad_w += cache_grad_sampling_loc[sid];
+            _grad_h += cache_grad_sampling_loc[sid + 1];
+            _grad_a += cache_grad_attn_weight[tid];
+            sid += 2;
+          }
+          *grad_sampling_loc = _grad_w;
+          *(grad_sampling_loc + 1) = _grad_h;
+          *grad_attn_weight = _grad_a;
+        }
+        __syncthreads();
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+template <typename scalar_t, unsigned int blockSize>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index,
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size,
+                                                const int spatial_size,
+                                                const int num_heads,
+                                                const int channels,
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
+    __shared__ scalar_t cache_grad_attn_weight[blockSize];
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp;
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+    const scalar_t top_grad = grad_col[index];
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr,
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        __syncthreads();
+        for (unsigned int s=blockSize/2; s>0; s>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+          }
+          __syncthreads();
+        }
+        if (tid == 0)
+        {
+          *grad_sampling_loc = cache_grad_sampling_loc[0];
+          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
+          *grad_attn_weight = cache_grad_attn_weight[0];
+        }
+        __syncthreads();
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index,
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size,
+                                                const int spatial_size,
+                                                const int num_heads,
+                                                const int channels,
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp;
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+    const scalar_t top_grad = grad_col[index];
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr,
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        __syncthreads();
+        if (tid == 0)
+        {
+          scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0];
+          int sid=2;
+          for (unsigned int tid = 1; tid < blockDim.x; ++tid)
+          {
+            _grad_w += cache_grad_sampling_loc[sid];
+            _grad_h += cache_grad_sampling_loc[sid + 1];
+            _grad_a += cache_grad_attn_weight[tid];
+            sid += 2;
+          }
+          *grad_sampling_loc = _grad_w;
+          *(grad_sampling_loc + 1) = _grad_h;
+          *grad_attn_weight = _grad_a;
+        }
+        __syncthreads();
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index,
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size,
+                                                const int spatial_size,
+                                                const int num_heads,
+                                                const int channels,
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp;
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+    const scalar_t top_grad = grad_col[index];
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr,
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        __syncthreads();
+        for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+            if (tid + (s << 1) < spre)
+            {
+              cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)];
+              cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)];
+              cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
+            }
+          }
+          __syncthreads();
+        }
+        if (tid == 0)
+        {
+          *grad_sampling_loc = cache_grad_sampling_loc[0];
+          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
+          *grad_attn_weight = cache_grad_attn_weight[0];
+        }
+        __syncthreads();
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index,
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size,
+                                                const int spatial_size,
+                                                const int num_heads,
+                                                const int channels,
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    extern __shared__ int _s[];
+    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;
+    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+    unsigned int tid = threadIdx.x;
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp;
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+    const scalar_t top_grad = grad_col[index];
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;
+        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;
+        *(cache_grad_attn_weight+threadIdx.x)=0;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr,
+            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);
+        }
+        __syncthreads();
+        for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1)
+        {
+          if (tid < s) {
+            const unsigned int xid1 = tid << 1;
+            const unsigned int xid2 = (tid + s) << 1;
+            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];
+            if (tid + (s << 1) < spre)
+            {
+              cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)];
+              cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)];
+              cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
+            }
+          }
+          __syncthreads();
+        }
+        if (tid == 0)
+        {
+          atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]);
+          atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]);
+          atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]);
+        }
+        __syncthreads();
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+template <typename scalar_t>
+__global__ void ms_deformable_col2im_gpu_kernel_gm(const int n,
+                                                const scalar_t *grad_col,
+                                                const scalar_t *data_value,
+                                                const int64_t *data_spatial_shapes,
+                                                const int64_t *data_level_start_index,
+                                                const scalar_t *data_sampling_loc,
+                                                const scalar_t *data_attn_weight,
+                                                const int batch_size,
+                                                const int spatial_size,
+                                                const int num_heads,
+                                                const int channels,
+                                                const int num_levels,
+                                                const int num_query,
+                                                const int num_point,
+                                                scalar_t *grad_value,
+                                                scalar_t *grad_sampling_loc,
+                                                scalar_t *grad_attn_weight)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    int _temp = index;
+    const int c_col = _temp % channels;
+    _temp /= channels;
+    const int sampling_index = _temp;
+    const int m_col = _temp % num_heads;
+    _temp /= num_heads;
+    const int q_col = _temp % num_query;
+    _temp /= num_query;
+    const int b_col = _temp;
+    const scalar_t top_grad = grad_col[index];
+    int data_weight_ptr = sampling_index * num_levels * num_point;
+    int data_loc_w_ptr = data_weight_ptr << 1;
+    const int grad_sampling_ptr = data_weight_ptr;
+    grad_sampling_loc += grad_sampling_ptr << 1;
+    grad_attn_weight += grad_sampling_ptr;
+    const int grad_weight_stride = 1;
+    const int grad_loc_stride = 2;
+    const int qid_stride = num_heads * channels;
+    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+    for (int l_col=0; l_col < num_levels; ++l_col)
+    {
+      const int level_start_id = data_level_start_index[l_col];
+      const int spatial_h_ptr = l_col << 1;
+      const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;
+      const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+      for (int p_col=0; p_col < num_point; ++p_col)
+      {
+        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+        const scalar_t weight = data_attn_weight[data_weight_ptr];
+        const scalar_t h_im = loc_h * spatial_h - 0.5;
+        const scalar_t w_im = loc_w * spatial_w - 0.5;
+        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)
+        {
+          ms_deform_attn_col2im_bilinear_gm(
+            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,
+            top_grad, weight, grad_value_ptr,
+            grad_sampling_loc, grad_attn_weight);
+        }
+        data_weight_ptr += 1;
+        data_loc_w_ptr += 2;
+        grad_attn_weight += grad_weight_stride;
+        grad_sampling_loc += grad_loc_stride;
+      }
+    }
+  }
+}
+template <typename scalar_t>
+void ms_deformable_im2col_cuda(cudaStream_t stream,
+                              const scalar_t* data_value,
+                              const int64_t* data_spatial_shapes,
+                              const int64_t* data_level_start_index,
+                              const scalar_t* data_sampling_loc,
+                              const scalar_t* data_attn_weight,
+                              const int batch_size,
+                              const int spatial_size,
+                              const int num_heads,
+                              const int channels,
+                              const int num_levels,
+                              const int num_query,
+                              const int num_point,
+                              scalar_t* data_col)
+{
+  const int num_kernels = batch_size * num_query * num_heads * channels;
+  const int num_actual_kernels = batch_size * num_query * num_heads * channels;
+  const int num_threads = CUDA_NUM_THREADS;
+  ms_deformable_im2col_gpu_kernel<scalar_t>
+      <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+          0, stream>>>(
+      num_kernels, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight,
+      batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, data_col);
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in ms_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
+  }
+}
+template <typename scalar_t>
+void ms_deformable_col2im_cuda(cudaStream_t stream,
+                              const scalar_t* grad_col,
+                              const scalar_t* data_value,
+                              const int64_t * data_spatial_shapes,
+                              const int64_t * data_level_start_index,
+                              const scalar_t * data_sampling_loc,
+                              const scalar_t * data_attn_weight,
+                              const int batch_size,
+                              const int spatial_size,
+                              const int num_heads,
+                              const int channels,
+                              const int num_levels,
+                              const int num_query,
+                              const int num_point,
+                              scalar_t* grad_value,
+                              scalar_t* grad_sampling_loc,
+                              scalar_t* grad_attn_weight)
+{
+  const int num_threads = (channels > CUDA_NUM_THREADS)?CUDA_NUM_THREADS:channels;
+  const int num_kernels = batch_size * num_query * num_heads * channels;
+  const int num_actual_kernels = batch_size * num_query * num_heads * channels;
+  if (channels > 1024)
+  {
+    if ((channels & 1023) == 0)
+    {
+      ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels,
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index,
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size,
+                        spatial_size,
+                        num_heads,
+                        channels,
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+    }
+    else
+    {
+      ms_deformable_col2im_gpu_kernel_gm<scalar_t>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+    }
+  }
+  else{
+    switch(channels)
+    {
+      case 1:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 1>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 2:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 2>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 4:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 4>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 8:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 8>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 16:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 16>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 32:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 32>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 64:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 64>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 128:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 128>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 256:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 256>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 512:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 512>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      case 1024:
+        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 1024>
+        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+            0, stream>>>(
+                      num_kernels,
+                      grad_col,
+                      data_value,
+                      data_spatial_shapes,
+                      data_level_start_index,
+                      data_sampling_loc,
+                      data_attn_weight,
+                      batch_size,
+                      spatial_size,
+                      num_heads,
+                      channels,
+                      num_levels,
+                      num_query,
+                      num_point,
+                      grad_value,
+                      grad_sampling_loc,
+                      grad_attn_weight);
+        break;
+      default:
+        if (channels < 64)
+        {
+          ms_deformable_col2im_gpu_kernel_shm_reduce_v1<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels,
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index,
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size,
+                        spatial_size,
+                        num_heads,
+                        channels,
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+        }
+        else
+        {
+          ms_deformable_col2im_gpu_kernel_shm_reduce_v2<scalar_t>
+          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,
+              num_threads*3*sizeof(scalar_t), stream>>>(
+                        num_kernels,
+                        grad_col,
+                        data_value,
+                        data_spatial_shapes,
+                        data_level_start_index,
+                        data_sampling_loc,
+                        data_attn_weight,
+                        batch_size,
+                        spatial_size,
+                        num_heads,
+                        channels,
+                        num_levels,
+                        num_query,
+                        num_point,
+                        grad_value,
+                        grad_sampling_loc,
+                        grad_attn_weight);
+        }
+    }
+  }
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in ms_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
+  }
+}

models/ops/src/ms_deform_attn.h ADDED Viewed

	@@ -0,0 +1,62 @@

+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#pragma once
+#include "cpu/ms_deform_attn_cpu.h"
+#ifdef WITH_CUDA
+#include "cuda/ms_deform_attn_cuda.h"
+#endif
+at::Tensor
+ms_deform_attn_forward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    if (value.type().is_cuda())
+    {
+#ifdef WITH_CUDA
+        return ms_deform_attn_cuda_forward(
+            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
+#else
+        AT_ERROR("Not compiled with GPU support");
+#endif
+    }
+    AT_ERROR("Not implemented on the CPU");
+}
+std::vector<at::Tensor>
+ms_deform_attn_backward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    if (value.type().is_cuda())
+    {
+#ifdef WITH_CUDA
+        return ms_deform_attn_cuda_backward(
+            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
+#else
+        AT_ERROR("Not compiled with GPU support");
+#endif
+    }
+    AT_ERROR("Not implemented on the CPU");
+}

models/ops/src/vision.cpp ADDED Viewed

	@@ -0,0 +1,16 @@

+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#include "ms_deform_attn.h"
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
+  m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
+}

models/ops/test.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+import time
+import torch
+import torch.nn as nn
+from torch.autograd import gradcheck
+from functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
+N, M, D = 1, 2, 2
+Lq, L, P = 2, 2, 2
+shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
+level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))
+S = sum([(H*W).item() for H, W in shapes])
+torch.manual_seed(3)
+@torch.no_grad()
+def check_forward_equal_with_pytorch_double():
+    value = torch.rand(N, S, M, D).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu()
+    output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu()
+    fwdok = torch.allclose(output_cuda, output_pytorch)
+    max_abs_err = (output_cuda - output_pytorch).abs().max()
+    max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
+    print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
+@torch.no_grad()
+def check_forward_equal_with_pytorch_float():
+    value = torch.rand(N, S, M, D).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()
+    output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu()
+    fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
+    max_abs_err = (output_cuda - output_pytorch).abs().max()
+    max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
+    print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
+def check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):
+    value = torch.rand(N, S, M, channels).cuda() * 0.01
+    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
+    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
+    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+    im2col_step = 2
+    func = MSDeformAttnFunction.apply
+    value.requires_grad = grad_value
+    sampling_locations.requires_grad = grad_sampling_loc
+    attention_weights.requires_grad = grad_attn_weight
+    gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step))
+    print(f'* {gradok} check_gradient_numerical(D={channels})')
+if __name__ == '__main__':
+    check_forward_equal_with_pytorch_double()
+    check_forward_equal_with_pytorch_float()
+    for channels in [30, 32, 64, 71, 1025, 2048, 3096]:
+        check_gradient_numerical(channels, True, True, True)

models/resnet.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import torch
+import torch.nn as nn
+from torchvision import models
+def convrelu(in_channels, out_channels, kernel, padding):
+    return nn.Sequential(
+        nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
+        nn.ReLU(inplace=True),
+    )
+class ResNetBackbone(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.base_model = models.resnet50(pretrained=False)
+        self.base_layers = list(self.base_model.children())
+        self.conv_original_size0 = convrelu(3, 64, 3, 1)
+        self.conv_original_size1 = convrelu(64, 64, 3, 1)
+        self.layer0 = nn.Sequential(*self.base_layers[:3])  # size=(N, 64, x.H/2, x.W/2)
+        self.layer1 = nn.Sequential(*self.base_layers[3:5])  # size=(N, 64, x.H/4, x.W/4)
+        self.layer2 = self.base_layers[5]  # size=(N, 128, x.H/8, x.W/8)
+        self.layer3 = self.base_layers[6]  # size=(N, 256, x.H/16, x.W/16)
+        self.layer4 = self.base_layers[7]  # size=(N, 512, x.H/32, x.W/32)
+        self.strides = [8, 16, 32]
+        self.num_channels = [512, 1024, 2048]
+    def forward(self, inputs):
+        x_original = self.conv_original_size0(inputs)
+        x_original = self.conv_original_size1(x_original)
+        layer0 = self.layer0(inputs)
+        layer1 = self.layer1(layer0)
+        layer2 = self.layer2(layer1)
+        layer3 = self.layer3(layer2)
+        layer4 = self.layer4(layer3)
+        xs = {"0": layer2, "1": layer3, "2": layer4}
+        all_feats = {'layer0': layer0, 'layer1': layer1, 'layer2': layer2,
+                     'layer3': layer3, 'layer4': layer4, 'x_original': x_original}
+        mask = torch.zeros(inputs.shape)[:, 0, :, :].to(layer4.device)
+        return xs, mask, all_feats
+    def train(self, mode=True):
+        # Override train so that the training mode is set as we want
+        nn.Module.train(self, mode)
+        if mode:
+            # fix all bn layers
+            def set_bn_eval(m):
+                classname = m.__class__.__name__
+                if classname.find('BatchNorm') != -1:
+                    m.eval()
+            self.apply(set_bn_eval)
+class ResNetUNet(nn.Module):
+    def __init__(self, n_class, out_dim=None, ms_feat=False):
+        super().__init__()
+        self.return_ms_feat = ms_feat
+        self.out_dim = out_dim
+        self.base_model = models.resnet50(pretrained=True)
+        self.base_layers = list(self.base_model.children())
+        self.layer0 = nn.Sequential(*self.base_layers[:3])  # size=(N, 64, x.H/2, x.W/2)
+        # self.layer0_1x1 = convrelu(64, 64, 1, 0)
+        self.layer1 = nn.Sequential(*self.base_layers[3:5])  # size=(N, 64, x.H/4, x.W/4)
+        # self.layer1_1x1 = convrelu(256, 256, 1, 0)
+        self.layer2 = self.base_layers[5]  # size=(N, 128, x.H/8, x.W/8)
+        # self.layer2_1x1 = convrelu(512, 512, 1, 0)
+        self.layer3 = self.base_layers[6]  # size=(N, 256, x.H/16, x.W/16)
+        # self.layer3_1x1 = convrelu(1024, 1024, 1, 0)
+        self.layer4 = self.base_layers[7]  # size=(N, 512, x.H/32, x.W/32)
+        # self.layer4_1x1 = convrelu(2048, 2048, 1, 0)
+        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.conv_up3 = convrelu(1024 + 2048, 1024, 3, 1)
+        self.conv_up2 = convrelu(512 + 1024, 512, 3, 1)
+        self.conv_up1 = convrelu(256 + 512, 256, 3, 1)
+        self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
+        # self.conv_up1 = convrelu(512, 256, 3, 1)
+        # self.conv_up0 = convrelu(256, 128, 3, 1)
+        self.conv_original_size0 = convrelu(3, 64, 3, 1)
+        self.conv_original_size1 = convrelu(64, 64, 3, 1)
+        self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
+        # self.conv_last = nn.Conv2d(128, n_class, 1)
+        self.conv_last = nn.Conv2d(64, n_class, 1)
+        if out_dim:
+            self.conv_out = nn.Conv2d(64, out_dim, 1)
+            # self.conv_out = nn.Conv2d(128, out_dim, 1)
+        # return_layers = {"layer2": "0", "layer3": "1", "layer4": "2"}
+        self.strides = [8, 16, 32]
+        self.num_channels = [512, 1024, 2048]
+    def forward(self, inputs):
+        x_original = self.conv_original_size0(inputs)
+        x_original = self.conv_original_size1(x_original)
+        layer0 = self.layer0(inputs)
+        layer1 = self.layer1(layer0)
+        layer2 = self.layer2(layer1)
+        layer3 = self.layer3(layer2)
+        layer4 = self.layer4(layer3)
+        # layer4 = self.layer4_1x1(layer4)
+        x = self.upsample(layer4)
+        # layer3 = self.layer3_1x1(layer3)
+        x = torch.cat([x, layer3], dim=1)
+        x = self.conv_up3(x)
+        layer3_up = x
+        x = self.upsample(x)
+        # layer2 = self.layer2_1x1(layer2)
+        x = torch.cat([x, layer2], dim=1)
+        x = self.conv_up2(x)
+        layer2_up = x
+        x = self.upsample(x)
+        # layer1 = self.layer1_1x1(layer1)
+        x = torch.cat([x, layer1], dim=1)
+        x = self.conv_up1(x)
+        x = self.upsample(x)
+        # layer0 = self.layer0_1x1(layer0)
+        x = torch.cat([x, layer0], dim=1)
+        x = self.conv_up0(x)
+        x = self.upsample(x)
+        x = torch.cat([x, x_original], dim=1)
+        x = self.conv_original_size2(x)
+        out = self.conv_last(x)
+        out = out.sigmoid().squeeze(1)
+        # xs = {"0": layer2, "1": layer3, "2": layer4}
+        xs = {"0": layer2_up, "1": layer3_up, "2": layer4}
+        mask = torch.zeros(inputs.shape)[:, 0, :, :].to(layer4.device)
+        # ms_feats = self.ms_feat(xs, mask)
+        if self.return_ms_feat:
+            if self.out_dim:
+                out_feat = self.conv_out(x)
+                out_feat = out_feat.permute(0, 2, 3, 1)
+                return xs, mask, out, out_feat
+            else:
+                return xs, mask, out
+        else:
+            return out
+    def train(self, mode=True):
+        # Override train so that the training mode is set as we want
+        nn.Module.train(self, mode)
+        if mode:
+            # fix all bn layers
+            def set_bn_eval(m):
+                classname = m.__class__.__name__
+                if classname.find('BatchNorm') != -1:
+                    m.eval()
+            self.apply(set_bn_eval)

models/stacked_hg.py ADDED Viewed

	@@ -0,0 +1,246 @@

+"""
+Hourglass network inserted in the pre-activated Resnet
+Use lr=0.01 for current version
+(c) Nan Xue (HAWP)
+(c) Yichao Zhou (LCNN)
+(c) YANG, Wei
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ["HourglassNet", "hg"]
+class Bottleneck2D(nn.Module):
+    expansion = 2
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck2D, self).__init__()
+        self.bn1 = nn.BatchNorm2d(inplanes)
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1)
+        self.bn3 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.bn1(x)
+        out = self.relu(out)
+        out = self.conv1(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn3(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        return out
+class Hourglass(nn.Module):
+    def __init__(self, block, num_blocks, planes, depth):
+        super(Hourglass, self).__init__()
+        self.depth = depth
+        self.block = block
+        self.hg = self._make_hour_glass(block, num_blocks, planes, depth)
+    def _make_residual(self, block, num_blocks, planes):
+        layers = []
+        for i in range(0, num_blocks):
+            layers.append(block(planes * block.expansion, planes))
+        return nn.Sequential(*layers)
+    def _make_hour_glass(self, block, num_blocks, planes, depth):
+        hg = []
+        for i in range(depth):
+            res = []
+            for j in range(3):
+                res.append(self._make_residual(block, num_blocks, planes))
+            if i == 0:
+                res.append(self._make_residual(block, num_blocks, planes))
+            hg.append(nn.ModuleList(res))
+        return nn.ModuleList(hg)
+    def _hour_glass_forward(self, n, x):
+        up1 = self.hg[n - 1][0](x)
+        low1 = F.max_pool2d(x, 2, stride=2)
+        low1 = self.hg[n - 1][1](low1)
+        if n > 1:
+            low2 = self._hour_glass_forward(n - 1, low1)
+        else:
+            low2 = self.hg[n - 1][3](low1)
+        low3 = self.hg[n - 1][2](low2)
+        up2 = F.interpolate(low3, scale_factor=2)
+        out = up1 + up2
+        return out
+    def forward(self, x):
+        return self._hour_glass_forward(self.depth, x)
+class HourglassNet(nn.Module):
+    """Hourglass model from Newell et al ECCV 2016"""
+    def __init__(self, inplanes, num_feats, block, head, depth, num_stacks, num_blocks, num_classes):
+        super(HourglassNet, self).__init__()
+        self.inplanes = inplanes
+        self.num_feats = num_feats
+        self.num_stacks = num_stacks
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3)
+        self.bn1 = nn.BatchNorm2d(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_residual(block, self.inplanes, 1)
+        self.layer2 = self._make_residual(block, self.inplanes, 1)
+        self.layer3 = self._make_residual(block, self.num_feats, 1)
+        self.maxpool = nn.MaxPool2d(2, stride=2)
+        # build hourglass modules
+        ch = self.num_feats * block.expansion
+        # vpts = []
+        hg, res, fc, score, fc_, score_ = [], [], [], [], [], []
+        for i in range(num_stacks):
+            hg.append(Hourglass(block, num_blocks, self.num_feats, depth))
+            res.append(self._make_residual(block, self.num_feats, num_blocks))
+            fc.append(self._make_fc(ch, ch))
+            score.append(head(ch, num_classes))
+            # vpts.append(VptsHead(ch))
+            # vpts.append(nn.Linear(ch, 9))
+            # score.append(nn.Conv2d(ch, num_classes, kernel_size=1))
+            # score[i].bias.data[0] += 4.6
+            # score[i].bias.data[2] += 4.6
+            if i < num_stacks - 1:
+                fc_.append(nn.Conv2d(ch, ch, kernel_size=1))
+                score_.append(nn.Conv2d(num_classes, ch, kernel_size=1))
+        self.hg = nn.ModuleList(hg)
+        self.res = nn.ModuleList(res)
+        self.fc = nn.ModuleList(fc)
+        self.score = nn.ModuleList(score)
+        # self.vpts = nn.ModuleList(vpts)
+        self.fc_ = nn.ModuleList(fc_)
+        self.score_ = nn.ModuleList(score_)
+    def _make_residual(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                )
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_fc(self, inplanes, outplanes):
+        bn = nn.BatchNorm2d(inplanes)
+        conv = nn.Conv2d(inplanes, outplanes, kernel_size=1)
+        return nn.Sequential(conv, bn, self.relu)
+    def forward(self, x):
+        out = []
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.layer1(x)
+        x = self.maxpool(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        for i in range(self.num_stacks):
+            y = self.hg[i](x)
+            y = self.res[i](y)
+            y = self.fc[i](y)
+            score = self.score[i](y)
+            out.append(score)
+            if i < self.num_stacks - 1:
+                fc_ = self.fc_[i](y)
+                score_ = self.score_[i](score)
+                x = x + fc_ + score_
+        return out[::-1], y
+    def train(self, mode=True):
+        # Override train so that the training mode is set as we want
+        nn.Module.train(self, mode)
+        if mode:
+            # fix all bn layers
+            def set_bn_eval(m):
+                classname = m.__class__.__name__
+                if classname.find('BatchNorm') != -1:
+                    m.eval()
+            self.apply(set_bn_eval)
+class MultitaskHead(nn.Module):
+    def __init__(self, input_channels, num_class, head_size):
+        super(MultitaskHead, self).__init__()
+        m = int(input_channels / 4)
+        heads = []
+        for output_channels in sum(head_size, []):
+            heads.append(
+                nn.Sequential(
+                    nn.Conv2d(input_channels, m, kernel_size=3, padding=1),
+                    nn.ReLU(inplace=True),
+                    nn.Conv2d(m, output_channels, kernel_size=1),
+                )
+            )
+        self.heads = nn.ModuleList(heads)
+        assert num_class == sum(sum(head_size, []))
+    def forward(self, x):
+        return torch.cat([head(x) for head in self.heads], dim=1)
+def build_hg():
+    inplanes = 64
+    num_feats = 256 //2
+    depth =  4
+    num_stacks = 2
+    num_blocks = 1
+    head_size = [[2], [2]]
+    out_feature_channels = 256
+    num_class = sum(sum(head_size, []))
+    model = HourglassNet(
+        block=Bottleneck2D,
+        inplanes = inplanes,
+        num_feats= num_feats,
+        depth=depth,
+        head=lambda c_in, c_out: MultitaskHead(c_in, c_out, head_size=head_size),
+        num_stacks = num_stacks,
+        num_blocks = num_blocks,
+        num_classes = num_class)
+    model.out_feature_channels = out_feature_channels
+    return model

predict.py ADDED Viewed

	@@ -0,0 +1,33 @@

+'''
+Author: [egrt]
+Date: 2022-08-23 13:21:27
+LastEditors: [egrt]
+LastEditTime: 2022-08-23 13:45:21
+Description:
+'''
+#--------------------------------------------------------------#
+#   对单张图片进行预测，运行结果保存在根目录
+#   默认保存文件为results/predict_out/predict_srgan.png
+#--------------------------------------------------------------#
+from PIL import Image
+from HEAT import HEAT
+if __name__ == "__main__":
+    heat = HEAT()
+    #----------------------------#
+    #   单张图片的保存路径
+    #----------------------------#
+    save_path = "assets/test_out.jpg"
+    while True:
+        img = input('Input image filename:')
+        try:
+            image = Image.open(img)
+        except:
+            print('Open Error! Try again!')
+            continue
+        else:
+            r_image = heat.detect_one_image(image)
+            r_image.save(save_path)
+            r_image.show()

qualitative_outdoor/generate_html.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os
+import os.path as osp
+import numpy as np
+head = '''
+<html>
+<head>
+<style>
+td {text-align: center;}
+</style>
+</head>
+<p>
+</p>
+<br>
+<table border="1">
+'''
+end = '''
+</table>
+<br>`
+</html>
+'''
+def writeHTML(out_path, results_dirs):
+    f = open(out_path, 'w')
+    f.write(head + '\n')
+    f.write('<tr>'
+            '<td style="background-color:#FFFFFF"> ID </td> '
+            '<td style="background-color:#FFFFFF"> Input </td> '
+            '<td style="background-color:#FFFFFF"> ConvMPN </td> '
+            '<td style="background-color:#FFFFFF"> Exp-cls </td> '
+            '<td style="background-color:#FFFFFF"> HAWP </td> '
+            '<td style="background-color:#FFFFFF"> LETR </td> '
+            '<td style="background-color:#FFFFFF"> HEAT (Ours) </td> '
+            '<td style="background-color:#FFFFFF"> G.T.  </td> '
+            '</tr>')
+    fileids_path = '../data/cities_dataset/valid_list.txt'
+    img_base = '../data/cities_dataset/rgb'
+    with open(fileids_path) as ff:
+        file_ids = ff.readlines()
+        file_ids = file_ids[50:]
+    file_ids = [file_id.strip() for file_id in file_ids]
+    permuted_ids = np.random.permutation(file_ids)
+    file_ids = permuted_ids[:100]
+    for file_id in file_ids:
+        row_str = '<tr>'
+        row_str += '<td> {} </td>'.format(file_id)
+        row_str += '<td> <img src="{}" width="180"> </td>'.format(os.path.join(img_base, file_id + '.jpg'))
+        for dir_idx, result_dir in enumerate(results_dirs):
+            pred_filepath = osp.join(result_dir, '{}.png'.format(file_id))
+            row_str += '<td> <img src="{}" width="180"> </td>'.format(pred_filepath)
+        row_str += '</tr>'
+        f.write(row_str + '\n')
+    f.write(end + '\n')
+if __name__ == '__main__':
+    results_dirs = ['svg_images_256/convmpn', 'svg_images_256/exp_cls', 'svg_images_256/hawp', 'svg_images_256/letr', 'svg_images_256/heat', 'svg_images_256/gt']
+    writeHTML(out_path='./outdoor_qual.html', results_dirs=results_dirs)

qualitative_outdoor/plot_utils.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import cv2
+import svgwrite
+import colorsys
+def plot_preds(image, corners, edges):
+    for line in edges:
+        cv2.line(image, tuple(line[:2]), tuple(line[2:]), (255, 255, 0), 2)
+    for c in corners:
+        cv2.circle(image, (int(c[0]), int(c[1])), 3, (0, 0, 255), -1)
+    return image
+def random_colors(N, bright=True, same=False, colors=None):
+    brightness = 1.0 if bright else 0.7
+    if colors is None or same:
+        if same:
+            hsv = [(0, 1, brightness) for i in range(N)]
+        else:
+            hsv = [(i / N, 1, brightness) for i in range(N)]
+    else:
+        hsv = [(colors[i], 1, brightness) for i in range(N)]
+    colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
+    return colors
+def svg_generate(image_link, corners, edges, name, size=512):
+    dwg = svgwrite.Drawing(name + '.svg', size=('{}'.format(size), '{}'.format(size)))
+    shapes = dwg.add(dwg.g(id='shape', fill='black'))
+    # colors = random_colors(len(edges), same=True)
+    shapes.add(dwg.image(href=image_link, size=(size, size)))
+    scale = size / 256
+    for i, edge in enumerate(edges):
+        x = edge[:2] * scale
+        y = edge[2:] * scale
+        shapes.add(dwg.line((int(x[0]), int(x[1])), (int(y[0]), int(y[1])),
+                            stroke="#EE6507", stroke_width=3*scale, opacity=0.7))
+    for i, corner in enumerate(corners):
+        shapes.add(dwg.circle((int(corners[i][0] * scale), int(corners[i][1]) * scale), r=4*scale,
+                              stroke='green', fill='white', stroke_width=2*scale, opacity=0.8))
+    return dwg

qualitative_outdoor/visualize_gt.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+import json
+import cv2
+import numpy as np
+from plot_utils import plot_preds, svg_generate
+import cairosvg
+image_base = '../data/cities_dataset/rgb/'
+annot_base = '../data/cities_dataset/annot/'
+data_filename = '../data/cities_dataset/valid_list.txt'
+with open(data_filename) as f:
+	filenames = f.readlines()
+filenames = filenames[50:]
+filenames = [filename.strip() for filename in filenames]
+for filename in filenames:
+	image_path = os.path.join(image_base, filename + '.jpg')
+	# image = cv2.imread(image_path)
+	annot_path = os.path.join(annot_base, filename + '.npy')
+	annot = np.load(annot_path, allow_pickle=True, encoding='latin1').tolist()
+	corners = np.array(list(annot.keys())).astype(np.int)
+	edges = set()
+	for c, others in annot.items():
+		for other_c in others:
+			edge = (c[0], c[1], other_c[0], other_c[1])
+			edge_2 = (other_c[0], other_c[1], c[0], c[1])
+			if edge not in edges and edge_2 not in edges:
+				edges.add(edge)
+	edges = np.array(list(edges)).astype(np.int)
+	# image = plot_preds(image, corners, edges)
+	# out_path = os.path.join(out_base, filename + '.png')
+	# cv2.imwrite(out_path, image)
+	svg = svg_generate(image_path, corners, edges, name='temp', size=256)
+	svg_path = './svg_results/' + 'tmp.svg'
+	svg.saveas(svg_path)
+	svg_img_path = './svg_images_256/gt/' + '{}.png'.format(filename)
+	cairosvg.svg2png(url=svg_path, write_to=svg_img_path)

qualitative_outdoor/visualize_npy.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+import json
+import cv2
+import numpy as np
+import cairosvg
+from plot_utils import plot_preds, svg_generate
+image_base = '../../data/outdoor/cities_dataset/rgb/'
+svg_base = './svg_results'
+if not os.path.exists(svg_base):
+    os.makedirs(svg_base)
+data_filename = '../data/outdoor/cities_dataset/valid_list.txt'
+with open(data_filename) as f:
+    filenames = f.readlines()
+filenames = filenames[50:]  # according to previous works, the testing samples are the last 350 samples of the val split
+filenames = [filename.strip() for filename in filenames]
+idx_to_filename = {idx: filename for idx, filename in enumerate(filenames)}
+method_name = 'heat'
+results_base = '../results/npy_outdoor_test_256/'
+svg_method_base = os.path.join(svg_base, method_name)
+if not os.path.exists(svg_method_base):
+    os.makedirs(svg_method_base)
+for result_filename in sorted(os.listdir(results_base)):
+    file_idx = int(result_filename[:-12])
+    filename = idx_to_filename[file_idx]
+    image_path = os.path.join(image_base, filename + '.jpg')
+    results_path = os.path.join(results_base, result_filename)
+    results = np.load(results_path, allow_pickle=True).tolist()
+    corners = results['corners'].astype(np.int)
+    edge_ids = results['edges']
+    edges = corners[edge_ids].reshape(edge_ids.shape[0], -1)
+    svg = svg_generate(image_path, corners, edges, name='temp', size=256)
+    svg_path = os.path.join(svg_base, 'tmp.svg')
+    svg.saveas(svg_path)  # save the svg file temporarily
+    svg_img_path = os.path.join(svg_method_base, '{}.png'.format(filename))
+    cairosvg.svg2png(url=svg_path, write_to=svg_img_path)

requirements.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+Cython==0.29.22
+defusedxml==0.6.0
+einops==0.4.1
+future==0.18.2
+imageio==2.16.1
+matplotlib==3.3.4
+MultiScaleDeformableAttention==1.0
+numpy==1.20.1
+opencv-python==4.4.0.44
+packaging==20.9
+Pillow==9.0.1
+prometheus-client==0.9.0
+prompt-toolkit==3.0.16
+ptyprocess==0.7.0
+pycparser==2.20
+Pygments==2.8.0
+python-dateutil==2.8.1
+scikit-image==0.19.2
+scikit-learn==1.0
+scipy==1.6.1
+six==1.15.0
+torch==1.5.1
+torchvision==0.6.1
+cairosvg==2.5.2
+svgwrite==1.4.2
+shapely==1.8.2
+gradio==2.5.3

s3d_floorplan_eval/DataRW/DataRW.py ADDED Viewed

	@@ -0,0 +1,4 @@

+class DataRW:
+    def __init__(self, options):
+        pass

s3d_floorplan_eval/DataRW/S3DRW.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import numpy as np
+import cv2
+import torch
+import os
+import time
+from DataRW.DataRW import DataRW
+from S3DLoader.S3DLoader import S3DLoader
+class S3DRW(DataRW):
+    def __init__(self, options):
+        """
+        Class for accessing FloorNet dataset related data
+        :param options:
+        """
+        # initialize the base class variables
+        super(DataRW, self).__init__()
+        self.options = options
+        self.dataset_path = options.dataset_path
+        self.scene_id = options.scene_id
+        self.mcts_path = options.mcts_path
+        self.creation_time = int(time.time())
+        self.device = torch.device("cpu")
+        # mode = "train"
+        # mode = "online_eval"
+        mode = "test"
+        # For validation only
+        # self.loader = S3DLoader(options, 'online_eval').dataset
+        self.loader = S3DLoader(options, mode).dataset
+        # gt_sample = iter(floornet_loader.dataset[int(self.scene_id)])
+        # self.gt_sample = floornet_loader.load_sample(list(iter(floornet_loader.dataset))[int(self.scene_id)])
+        if mode == "online_eval":
+            scene_ind = int(self.scene_id[6:]) - 3000
+        elif mode == "test":
+            scene_ind = int(self.scene_id[6:]) - 3250
+        elif mode == "train":
+            scene_ind = int(self.scene_id[6:])
+        else:
+            assert False
+        # print(len(list(iter(self.s3d_loader.data))))
+        self.gt_sample = gt_sample = self.loader[scene_ind]
+        self.gt_sample["density_map"] = torch.tensor(self.gt_sample["density_map"][None], device=self.device)
+        self.gt_sample["room_map"] = torch.tensor(self.gt_sample["room_map"][None,:,:,None], device=self.device)
+        self.gt_sample["wall_map"] = torch.tensor(self.gt_sample["wall_map"][None,:,:,None], device=self.device)
+        self.density_map = self.gt_sample['density_map'][:,:,:,None]
+        self.h, self.w = self.density_map.shape[1], self.density_map.shape[2]
+        self.generate_input_map_from_props = self.generate_input_dict_from_room_props
+    def get_gt_solution(self):
+        """
+        Read top-view density map of the scene
+        :return:
+        """
+        img_path = os.path.join(self.dataset_path, str(self.scene_id) + "_density.png")
+        density_map = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_ANYCOLOR)[:,:, 0][None,:,:,None]
+        density_map = torch.from_numpy(density_map).to(self.device)
+        dm_min = torch.min(density_map)
+        dm_max = torch.max(density_map)
+        density_map = (density_map - dm_min) / (dm_max - dm_min)
+        return density_map.type(torch.cuda.FloatTensor)
+    def polygonize_mask(self, pm, return_mask=True):
+        pm_np = pm.cpu().numpy()
+        room_mask = 255 * (pm_np == 1)
+        room_mask = room_mask.astype(np.uint8)
+        room_mask_inv = 255 - room_mask
+        ret, thresh = cv2.threshold(room_mask_inv, 250, 255, cv2.THRESH_BINARY_INV)
+        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        cnt = contours[0]
+        max_area = cv2.contourArea(cnt)
+        for cont in contours:
+            if cv2.contourArea(cont) > max_area:
+                cnt = cont
+                max_area = cv2.contourArea(cont)
+        # define main island contour approx. and hull
+        perimeter = cv2.arcLength(cnt, True)
+        epsilon = 0.01 * cv2.arcLength(cnt, True)
+        approx = cv2.approxPolyDP(cnt, epsilon, True)
+        # approx = np.concatenate([approx, approx[0][None]], axis=0)
+        approx = approx.astype(np.int32).reshape((1, -1, 2))
+        if return_mask:
+            room_filled_map = np.zeros((self.h, self.w))
+            cv2.fillPoly(room_filled_map, approx, color=1.)
+            room_filled_map = torch.tensor(room_filled_map[:,:], dtype=torch.float32, device=self.device)
+            return room_filled_map
+        else:
+            approx_tensor = torch.tensor(approx, device=self.device)
+            return approx_tensor
+    def generate_input_dict_from_room_props(self, room_prop_list, score_function, use_thresh=False):
+        """
+        :param room_prop_list:
+        :type room_prop_list: list of FloorPlanRoomProp
+        :param score_function:
+        :return:
+        """
+        if score_function == "room_maskrcnn_iou":
+            inputs = self.generate_input_dict_for_room_maskrcnn_iou(room_prop_list)
+        elif score_function == "room_iou":
+            inputs = self.generate_input_dict_for_room_iou(room_prop_list, use_thresh=use_thresh)
+        else:
+            assert "generate_input_dict_from_room_props for %s not implemented" % score_function
+        return inputs

s3d_floorplan_eval/DataRW/wrong_annotatios.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ wrong_s3d_annotations_list = [3261, 3271, 3276, 3296, 3342, 3387, 3398, 3466, 3496]

s3d_floorplan_eval/Evaluator/Evaluator.py ADDED Viewed

	@@ -0,0 +1,457 @@

+import os
+import torch
+import matplotlib.pyplot as plt
+import cv2
+import numpy as np
+from scipy.spatial import Delaunay
+import os
+import shapely
+from shapely.geometry import Polygon, MultiPolygon, LineString, MultiLineString
+corner_metric_thresh = 10
+angle_metric_thresh = 5
+# colormap_255 = [[i, i, i] for i in range(40)]
+class Evaluator():
+    def __init__(self, data_rw, options):
+        self.data_rw = data_rw
+        self.options = options
+        self.device = torch.device("cuda")
+    def polygonize_mask(self, mask, degree, return_mask=True):
+        h, w = mask.shape[0], mask.shape[1]
+        mask = mask
+        room_mask = 255 * (mask == 1)
+        room_mask = room_mask.astype(np.uint8)
+        room_mask_inv = 255 - room_mask
+        ret, thresh = cv2.threshold(room_mask_inv, 250, 255, cv2.THRESH_BINARY_INV)
+        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        cnt = contours[0]
+        max_area = cv2.contourArea(cnt)
+        for cont in contours:
+            if cv2.contourArea(cont) > max_area:
+                cnt = cont
+                max_area = cv2.contourArea(cont)
+        perimeter = cv2.arcLength(cnt, True)
+        # epsilon = 0.01 * cv2.arcLength(cnt, True)
+        epsilon = degree * cv2.arcLength(cnt, True)
+        approx = cv2.approxPolyDP(cnt, epsilon, True)
+        # approx = np.concatenate([approx, approx[0][None]], axis=0)
+        approx = approx.astype(np.int32).reshape((-1, 2))
+        # approx_tensor = torch.tensor(approx, device=self.device)
+        # return approx_tensor
+        if return_mask:
+            room_filled_map = np.zeros((h, w))
+            cv2.fillPoly(room_filled_map, [approx], color=1.)
+            return approx, room_filled_map
+        else:
+            return approx
+    def print_res_str_for_latex(self, quant_result_dict):
+        str_fields = ""
+        str_values = ""
+        avg_value_prec = 0
+        avg_value_rec = 0
+        for k_ind, k in enumerate(quant_result_dict.keys()):
+            str_fields += " & " + k
+            str_values += " & %.2f " % quant_result_dict[k]
+            if k_ind % 2 == 0:
+                avg_value_prec += quant_result_dict[k] / 3
+            else:
+                avg_value_rec += quant_result_dict[k] / 3
+            str_fields += "tm_prec & tm_rec"
+        str_values += " & %.2f " % avg_value_prec
+        str_values += " & %.2f " % avg_value_rec
+        str_fields += " \\\\"
+        str_values += " \\\\"
+        print(str_fields)
+        print(str_values)
+    def calc_gradient(self, room_map):
+        grad_x = np.abs(room_map[:, 1:] - room_map[:, :-1])
+        grad_y = np.abs(room_map[1:] - room_map[:-1])
+        grad_xy = np.zeros_like(room_map)
+        grad_xy[1:] = grad_y
+        grad_xy[:, 1:] = np.maximum(grad_x, grad_xy[:,1:])
+        plt.figure()
+        plt.axis("off")
+        plt.imshow(grad_xy, cmap="gray")
+        # plt.show()
+        plt.savefig("grad.png", bbox_inches='tight')
+        plt.figure()
+        plt.axis("off")
+        plt.imshow(room_map, cmap="gray")
+        # plt.show()
+        plt.savefig("joint_mask.png", bbox_inches='tight')
+        assert False
+    def evaluate_scene(self, room_polys, show=False, name="ours", dataset_type="s3d"):
+        with torch.no_grad():
+            joint_room_map = np.zeros((self.options.height, self.options.width))
+            edge_map = np.zeros_like(joint_room_map)
+            room_filled_map = np.ones([joint_room_map.shape[0], joint_room_map.shape[1], 3])
+        density_map = self.data_rw.density_map.cpu().numpy()[0]
+        img_size = (density_map.shape[0], density_map.shape[0])
+        for room_ind, poly in enumerate(room_polys):
+            cv2.polylines(edge_map, [poly], isClosed=True, color=1.)
+            cv2.fillPoly(joint_room_map, [poly], color=1.)
+        joint_room_map_vis = np.ones([joint_room_map.shape[0], joint_room_map.shape[1], 3])
+        # Ground Truth
+        gt_polys_list = self.data_rw.gt_sample["polygons_list"]
+        gt_polys_list = [np.concatenate([poly, poly[None, 0]]) for poly in gt_polys_list]
+        ignore_mask_region = self.data_rw.gt_sample["wall_map"].cpu().numpy()[0, :, :, 0]
+        img_size = (joint_room_map.shape[0], joint_room_map.shape[1])
+        quant_result_dict = self.get_quantitative(gt_polys_list, ignore_mask_region, room_polys, img_size, dataset_type=dataset_type)
+        return quant_result_dict
+    def get_quantitative(self, gt_polys, ignore_mask_region, pred_polys=None, masks_list=None, img_size=(256, 256), dataset_type="s3d"):
+        def get_room_metric():
+            pred_overlaps = [False] * len(pred_room_map_list)
+            for pred_ind1 in range(len(pred_room_map_list) - 1):
+                pred_map1 = pred_room_map_list[pred_ind1]
+                for pred_ind2 in range(pred_ind1 + 1, len(pred_room_map_list)):
+                    pred_map2 = pred_room_map_list[pred_ind2]
+                    if dataset_type == "s3d":
+                        kernel = np.ones((5, 5), np.uint8)
+                    else:
+                        kernel = np.ones((3, 3), np.uint8)
+                    # todo: for our method, the rooms share corners and edges, need to check here
+                    pred_map1_er = cv2.erode(pred_map1, kernel)
+                    pred_map2_er = cv2.erode(pred_map2, kernel)
+                    intersection = (pred_map1_er + pred_map2_er) == 2
+                    # intersection = (pred_map1 + pred_map2) == 2
+                    intersection_area = np.sum(intersection)
+                    if intersection_area >= 1:
+                        pred_overlaps[pred_ind1] = True
+                        pred_overlaps[pred_ind2] = True
+            # import pdb; pdb.set_trace()
+            room_metric = [np.bool((1 - pred_overlaps[ind]) * pred2gt_exists[ind]) for ind in range(len(pred_polys))]
+            return room_metric
+        def get_corner_metric():
+            room_corners_metric = []
+            for pred_poly_ind, gt_poly_ind in enumerate(pred2gt_indices):
+                p_poly = pred_polys[pred_poly_ind][:-1] # Last vertex = First vertex
+                p_poly_corner_metrics = [False] * p_poly.shape[0]
+                if not room_metric[pred_poly_ind]:
+                    room_corners_metric += p_poly_corner_metrics
+                    continue
+                gt_poly = gt_polys[gt_poly_ind][:-1]
+                # for v in p_poly:
+                #     v_dists = np.linalg.norm(v[None,:] - gt_poly, axis=1, ord=2)
+                #     v_min_dist = np.min(v_dists)
+                #
+                #     v_tp = v_min_dist <= 10
+                #     room_corners_metric.append(v_tp)
+                for v in gt_poly:
+                    v_dists = np.linalg.norm(v[None,:] - p_poly, axis=1, ord=2)
+                    v_min_dist_ind = np.argmin(v_dists)
+                    v_min_dist = v_dists[v_min_dist_ind]
+                    if not p_poly_corner_metrics[v_min_dist_ind]:
+                        v_tp = v_min_dist <= corner_metric_thresh
+                        p_poly_corner_metrics[v_min_dist_ind] = v_tp
+                room_corners_metric += p_poly_corner_metrics
+            return room_corners_metric
+        def get_angle_metric():
+            def get_line_vector(p1, p2):
+                p1 = np.concatenate((p1, np.array([1])))
+                p2 = np.concatenate((p2, np.array([1])))
+                line_vector = -np.cross(p1, p2)
+                return line_vector
+            def get_poly_orientation(my_poly):
+                angles_sum = 0
+                for v_ind, _ in enumerate(my_poly):
+                    if v_ind < len(my_poly) - 1:
+                        v_sides = my_poly[[v_ind - 1, v_ind, v_ind, v_ind + 1], :]
+                    else:
+                        v_sides = my_poly[[v_ind - 1, v_ind, v_ind, 0], :]
+                    v1_vector = get_line_vector(v_sides[0], v_sides[1])
+                    v1_vector = v1_vector / (np.linalg.norm(v1_vector, ord=2) + 1e-4)
+                    v2_vector = get_line_vector(v_sides[2], v_sides[3])
+                    v2_vector = v2_vector / (np.linalg.norm(v2_vector, ord=2) + 1e-4)
+                    orientation = (v_sides[1, 1] - v_sides[0, 1]) * (v_sides[3, 0] - v_sides[1, 0]) - (
+                            v_sides[3, 1] - v_sides[1, 1]) * (
+                                          v_sides[1, 0] - v_sides[0, 0])
+                    v1_vector_2d = v1_vector[:2] / (v1_vector[2] + 1e-4)
+                    v2_vector_2d = v2_vector[:2] / (v2_vector[2] + 1e-4)
+                    v1_vector_2d = v1_vector_2d / (np.linalg.norm(v1_vector_2d, ord=2) + 1e-4)
+                    v2_vector_2d = v2_vector_2d / (np.linalg.norm(v2_vector_2d, ord=2) + 1e-4)
+                    angle_cos = v1_vector_2d.dot(v2_vector_2d)
+                    angle_cos = np.clip(angle_cos, -1, 1)
+                    # G.T. has clockwise orientation, remove minus in the equation
+                    angle = np.sign(orientation) * np.abs(np.arccos(angle_cos))
+                    angle_degree = angle * 180 / np.pi
+                    angles_sum += angle_degree
+                return np.sign(angles_sum)
+            def get_angle_v_sides(inp_v_sides, poly_orient):
+                v1_vector = get_line_vector(inp_v_sides[0], inp_v_sides[1])
+                v1_vector = v1_vector / (np.linalg.norm(v1_vector, ord=2) + 1e-4)
+                v2_vector = get_line_vector(inp_v_sides[2], inp_v_sides[3])
+                v2_vector = v2_vector / (np.linalg.norm(v2_vector, ord=2) + 1e-4)
+                orientation = (inp_v_sides[1, 1] - inp_v_sides[0, 1]) * (inp_v_sides[3, 0] - inp_v_sides[1, 0]) - (
+                        inp_v_sides[3, 1] - inp_v_sides[1, 1]) * (
+                                      inp_v_sides[1, 0] - inp_v_sides[0, 0])
+                v1_vector_2d = v1_vector[:2] / (v1_vector[2]+ 1e-4)
+                v2_vector_2d = v2_vector[:2] / (v2_vector[2]+ 1e-4)
+                v1_vector_2d = v1_vector_2d / (np.linalg.norm(v1_vector_2d, ord=2) + 1e-4)
+                v2_vector_2d = v2_vector_2d / (np.linalg.norm(v2_vector_2d, ord=2) + 1e-4)
+                angle_cos = v1_vector_2d.dot(v2_vector_2d)
+                angle_cos = np.clip(angle_cos, -1, 1)
+                angle = poly_orient * np.sign(orientation) * np.arccos(angle_cos)
+                angle_degree = angle * 180 / np.pi
+                return angle_degree
+            room_angles_metric = []
+            for pred_poly_ind, gt_poly_ind in enumerate(pred2gt_indices):
+                p_poly = pred_polys[pred_poly_ind][:-1] # Last vertex = First vertex
+                p_poly_angle_metrics = [False] * p_poly.shape[0]
+                if not room_metric[pred_poly_ind]:
+                    room_angles_metric += p_poly_angle_metrics
+                    continue
+                gt_poly = gt_polys[gt_poly_ind][:-1]
+                # for v in p_poly:
+                #     v_dists = np.linalg.norm(v[None,:] - gt_poly, axis=1, ord=2)
+                #     v_min_dist = np.min(v_dists)
+                #
+                #     v_tp = v_min_dist <= 10
+                #     room_corners_metric.append(v_tp)
+                gt_poly_orient = get_poly_orientation(gt_poly)
+                p_poly_orient = get_poly_orientation(p_poly)
+                for v_gt_ind, v in enumerate(gt_poly):
+                    v_dists = np.linalg.norm(v[None,:] - p_poly, axis=1, ord=2)
+                    v_ind = np.argmin(v_dists)
+                    v_min_dist = v_dists[v_ind]
+                    if v_min_dist > corner_metric_thresh:
+                        # room_angles_metric.append(False)
+                        continue
+                    if v_ind < len(p_poly) - 1:
+                        v_sides = p_poly[[v_ind - 1, v_ind, v_ind, v_ind + 1], :]
+                    else:
+                        v_sides = p_poly[[v_ind - 1, v_ind, v_ind, 0], :]
+                    v_sides = v_sides.reshape((4,2))
+                    pred_angle_degree = get_angle_v_sides(v_sides, p_poly_orient)
+                    # Note: replacing some variables with values from the g.t. poly
+                    if v_gt_ind < len(gt_poly) - 1:
+                        v_sides = gt_poly[[v_gt_ind - 1, v_gt_ind, v_gt_ind, v_gt_ind + 1], :]
+                    else:
+                        v_sides = gt_poly[[v_gt_ind - 1, v_gt_ind, v_gt_ind, 0], :]
+                    v_sides = v_sides.reshape((4, 2))
+                    gt_angle_degree = get_angle_v_sides(v_sides, gt_poly_orient)
+                    angle_metric = np.abs(pred_angle_degree - gt_angle_degree)
+                    # room_angles_metric.append(angle_metric < 5)
+                    p_poly_angle_metrics[v_ind] = angle_metric <= angle_metric_thresh
+                    # if angle_metric > 5:
+                    #     print(v_gt_ind, angle_metric)
+                    #     print(pred_angle_degree, gt_angle_degree)
+                    #     input("?")
+                room_angles_metric += p_poly_angle_metrics
+            for am, cm in zip(room_angles_metric, corner_metric):
+                assert not (cm == False and am == True), "cm: %d am: %d" %(cm, am)
+            return room_angles_metric
+        def poly_map_sort_key(x):
+            return np.sum(x[1])
+        h, w = img_size
+        gt_room_map_list = []
+        for room_ind, poly in enumerate(gt_polys):
+            room_map = np.zeros((h, w))
+            cv2.fillPoly(room_map, [poly], color=1.)
+            gt_room_map_list.append(room_map)
+        gt_polys_sorted_indcs = [i[0] for i in sorted(enumerate(gt_room_map_list), key=poly_map_sort_key, reverse=True)]
+        gt_polys = [gt_polys[ind] for ind in gt_polys_sorted_indcs]
+        gt_room_map_list = [gt_room_map_list[ind] for ind in gt_polys_sorted_indcs]
+        if pred_polys is not None:
+            pred_room_map_list = []
+            for room_ind, poly in enumerate(pred_polys):
+                room_map = np.zeros((h, w))
+                cv2.fillPoly(room_map, [poly], color=1.)
+                pred_room_map_list.append(room_map)
+        else:
+            pred_room_map_list = masks_list
+        gt2pred_indices = [-1] * len(gt_polys)
+        gt2pred_exists = [False] * len(gt_polys)
+        for gt_ind, gt_map in enumerate(gt_room_map_list):
+            best_iou = 0.
+            best_ind = -1
+            for pred_ind, pred_map in enumerate(pred_room_map_list):
+                intersection = (1 - ignore_mask_region) * ((pred_map + gt_map) == 2)
+                union = (1 - ignore_mask_region) * ((pred_map + gt_map) >= 1)
+                iou = np.sum(intersection) / (np.sum(union) + 1)
+                if iou > best_iou and iou > 0.5:
+                    best_iou = iou
+                    best_ind = pred_ind
+            #         plt.figure()
+            #         plt.subplot(121)
+            #         plt.imshow(pred_map)
+            #         plt.subplot(122)
+            #         plt.imshow(gt_map)
+            #         plt.show()
+            # if best_ind == -1:
+            #     plt.figure()
+            #     plt.imshow(gt_map)
+            #     plt.show()
+            gt2pred_indices[gt_ind] = best_ind
+            gt2pred_exists[gt_ind] = best_ind != -1
+            # if best_ind == -1:
+            #     plt.figure()
+            #     plt.imshow(gt_map)
+            #     plt.show()
+        pred2gt_exists = [True if pred_ind in gt2pred_indices else False for pred_ind, _ in enumerate(pred_polys)]
+        pred2gt_indices = [gt2pred_indices.index(pred_ind) if pred_ind in gt2pred_indices else -1 for pred_ind, _ in enumerate(pred_polys)]
+        # print(gt2pred_indices)
+        # print(pred2gt_indices)
+        # assert False
+        # import pdb; pdb.set_trace()
+        room_metric = get_room_metric()
+        if len(pred_polys) == 0:
+            room_metric_prec = 0
+        else:
+            room_metric_prec = sum(room_metric) / float(len(pred_polys))
+        room_metric_rec = sum(room_metric) / float(len(gt_polys))
+        corner_metric = get_corner_metric()
+        pred_corners_n = sum([poly.shape[0] - 1 for poly in pred_polys])
+        gt_corners_n = sum([poly.shape[0] - 1 for poly in gt_polys])
+        if pred_corners_n > 0:
+            corner_metric_prec = sum(corner_metric) / float(pred_corners_n)
+        else:
+            corner_metric_prec = 0
+        corner_metric_rec = sum(corner_metric) / float(gt_corners_n)
+        angles_metric = get_angle_metric()
+        if pred_corners_n > 0:
+            angles_metric_prec = sum(angles_metric) / float(pred_corners_n)
+        else:
+            angles_metric_prec = 0
+        angles_metric_rec = sum(angles_metric) / float(gt_corners_n)
+        assert room_metric_prec <= 1
+        assert room_metric_rec <= 1
+        assert corner_metric_prec <= 1
+        assert corner_metric_rec <= 1
+        assert angles_metric_prec <= 1
+        assert angles_metric_rec <= 1
+        result_dict = {
+            'room_prec': room_metric_prec,
+            'room_rec': room_metric_rec,
+            'corner_prec': corner_metric_prec,
+            'corner_rec': corner_metric_rec,
+            'angles_prec': angles_metric_prec,
+            'angles_rec': angles_metric_rec,
+        }
+        return result_dict