DenseFusion-ObjectPose / tools /eval_ycb.py

Uploading all files

29858c0 verified 6 months ago

9.65 kB

	import _init_paths
	import argparse
	import os
	import copy
	import random
	import numpy as np
	from PIL import Image
	import scipy.io as scio
	import scipy.misc
	import numpy.ma as ma
	import math
	import torch
	import torch.nn as nn
	import torch.nn.parallel
	import torch.backends.cudnn as cudnn
	import torch.optim as optim
	import torch.utils.data
	import torchvision.datasets as dset
	import torchvision.transforms as transforms
	import torchvision.utils as vutils
	import torch.nn.functional as F
	from torch.autograd import Variable
	from datasets.ycb.dataset import PoseDataset
	from lib.network import PoseNet, PoseRefineNet
	from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix

	parser = argparse.ArgumentParser()
	parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
	parser.add_argument('--model', type=str, default = '', help='resume PoseNet model')
	parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model')
	opt = parser.parse_args()

	norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
	border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
	xmap = np.array([[j for i in range(640)] for j in range(480)])
	ymap = np.array([[i for i in range(640)] for j in range(480)])
	cam_cx = 312.9869
	cam_cy = 241.3109
	cam_fx = 1066.778
	cam_fy = 1067.487
	cam_scale = 10000.0
	num_obj = 21
	img_width = 480
	img_length = 640
	num_points = 1000
	num_points_mesh = 500
	iteration = 2
	bs = 1
	dataset_config_dir = 'datasets/ycb/dataset_config'
	ycb_toolbox_dir = 'YCB_Video_toolbox'
	result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
	result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'

	def get_bbox(posecnn_rois):
	rmin = int(posecnn_rois[idx][3]) + 1
	rmax = int(posecnn_rois[idx][5]) - 1
	cmin = int(posecnn_rois[idx][2]) + 1
	cmax = int(posecnn_rois[idx][4]) - 1
	r_b = rmax - rmin
	for tt in range(len(border_list)):
	if r_b > border_list[tt] and r_b < border_list[tt + 1]:
	r_b = border_list[tt + 1]
	break
	c_b = cmax - cmin
	for tt in range(len(border_list)):
	if c_b > border_list[tt] and c_b < border_list[tt + 1]:
	c_b = border_list[tt + 1]
	break
	center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
	rmin = center[0] - int(r_b / 2)
	rmax = center[0] + int(r_b / 2)
	cmin = center[1] - int(c_b / 2)
	cmax = center[1] + int(c_b / 2)
	if rmin < 0:
	delt = -rmin
	rmin = 0
	rmax += delt
	if cmin < 0:
	delt = -cmin
	cmin = 0
	cmax += delt
	if rmax > img_width:
	delt = rmax - img_width
	rmax = img_width
	rmin -= delt
	if cmax > img_length:
	delt = cmax - img_length
	cmax = img_length
	cmin -= delt
	return rmin, rmax, cmin, cmax

	estimator = PoseNet(num_points = num_points, num_obj = num_obj)
	estimator.cuda()
	estimator.load_state_dict(torch.load(opt.model))
	estimator.eval()

	refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
	refiner.cuda()
	refiner.load_state_dict(torch.load(opt.refine_model))
	refiner.eval()

	testlist = []
	input_file = open('{0}/test_data_list.txt'.format(dataset_config_dir))
	while 1:
	input_line = input_file.readline()
	if not input_line:
	break
	if input_line[-1:] == '\n':
	input_line = input_line[:-1]
	testlist.append(input_line)
	input_file.close()
	print(len(testlist))

	class_file = open('{0}/classes.txt'.format(dataset_config_dir))
	class_id = 1
	cld = {}
	while 1:
	class_input = class_file.readline()
	if not class_input:
	break
	class_input = class_input[:-1]

	input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
	cld[class_id] = []
	while 1:
	input_line = input_file.readline()
	if not input_line:
	break
	input_line = input_line[:-1]
	input_line = input_line.split(' ')
	cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
	input_file.close()
	cld[class_id] = np.array(cld[class_id])
	class_id += 1

	for now in range(0, 2949):
	img = Image.open('{0}/{1}-color.png'.format(opt.dataset_root, testlist[now]))
	depth = np.array(Image.open('{0}/{1}-depth.png'.format(opt.dataset_root, testlist[now])))
	posecnn_meta = scio.loadmat('{0}/results_PoseCNN_RSS2018/{1}.mat'.format(ycb_toolbox_dir, '%06d' % now))
	label = np.array(posecnn_meta['labels'])
	posecnn_rois = np.array(posecnn_meta['rois'])

	lst = posecnn_rois[:, 1:2].flatten()
	my_result_wo_refine = []
	my_result = []

	for idx in range(len(lst)):
	itemid = lst[idx]
	try:
	rmin, rmax, cmin, cmax = get_bbox(posecnn_rois)

	mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
	mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
	mask = mask_label * mask_depth

	choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
	if len(choose) > num_points:
	c_mask = np.zeros(len(choose), dtype=int)
	c_mask[:num_points] = 1
	np.random.shuffle(c_mask)
	choose = choose[c_mask.nonzero()]
	else:
	choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

	depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
	xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
	ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
	choose = np.array([choose])

	pt2 = depth_masked / cam_scale
	pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
	pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
	cloud = np.concatenate((pt0, pt1, pt2), axis=1)

	img_masked = np.array(img)[:, :, :3]
	img_masked = np.transpose(img_masked, (2, 0, 1))
	img_masked = img_masked[:, rmin:rmax, cmin:cmax]

	cloud = torch.from_numpy(cloud.astype(np.float32))
	choose = torch.LongTensor(choose.astype(np.int32))
	img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
	index = torch.LongTensor([itemid - 1])

	cloud = Variable(cloud).cuda()
	choose = Variable(choose).cuda()
	img_masked = Variable(img_masked).cuda()
	index = Variable(index).cuda()

	cloud = cloud.view(1, num_points, 3)
	img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])

	pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
	pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)

	pred_c = pred_c.view(bs, num_points)
	how_max, which_max = torch.max(pred_c, 1)
	pred_t = pred_t.view(bs * num_points, 1, 3)
	points = cloud.view(bs * num_points, 1, 3)

	my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
	my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
	my_pred = np.append(my_r, my_t)
	my_result_wo_refine.append(my_pred.tolist())

	for ite in range(0, iteration):
	T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
	my_mat = quaternion_matrix(my_r)
	R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
	my_mat[0:3, 3] = my_t

	new_cloud = torch.bmm((cloud - T), R).contiguous()
	pred_r, pred_t = refiner(new_cloud, emb, index)
	pred_r = pred_r.view(1, 1, -1)
	pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
	my_r_2 = pred_r.view(-1).cpu().data.numpy()
	my_t_2 = pred_t.view(-1).cpu().data.numpy()
	my_mat_2 = quaternion_matrix(my_r_2)

	my_mat_2[0:3, 3] = my_t_2

	my_mat_final = np.dot(my_mat, my_mat_2)
	my_r_final = copy.deepcopy(my_mat_final)
	my_r_final[0:3, 3] = 0
	my_r_final = quaternion_from_matrix(my_r_final, True)
	my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

	my_pred = np.append(my_r_final, my_t_final)
	my_r = my_r_final
	my_t = my_t_final

	# Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)

	my_result.append(my_pred.tolist())
	except ZeroDivisionError:
	print("PoseCNN Detector Lost {0} at No.{1} keyframe".format(itemid, now))
	my_result_wo_refine.append([0.0 for i in range(7)])
	my_result.append([0.0 for i in range(7)])

	scio.savemat('{0}/{1}.mat'.format(result_wo_refine_dir, '%04d' % now), {'poses':my_result_wo_refine})
	scio.savemat('{0}/{1}.mat'.format(result_refine_dir, '%04d' % now), {'poses':my_result})
	print("Finish No.{0} keyframe".format(now))