Spaces:
Configuration error
Configuration error
File size: 7,924 Bytes
dbf90d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import torch
import numpy as np
import os
import random
import copy
from torch.utils.data import Dataset, DataLoader
from lib.utils.utils_data import crop_scale, resample
from lib.utils.tools import read_pkl
def get_action_names(file_path = "data/action/ntu_actions.txt"):
f = open(file_path, "r")
s = f.read()
actions = s.split('\n')
action_names = []
for a in actions:
action_names.append(a.split('.')[1][1:])
return action_names
def make_cam(x, img_shape):
'''
Input: x (M x T x V x C)
img_shape (height, width)
'''
h, w = img_shape
if w >= h:
x_cam = x / w * 2 - 1
else:
x_cam = x / h * 2 - 1
return x_cam
def coco2h36m(x):
'''
Input: x (M x T x V x C)
COCO: {0-nose 1-Leye 2-Reye 3-Lear 4Rear 5-Lsho 6-Rsho 7-Lelb 8-Relb 9-Lwri 10-Rwri 11-Lhip 12-Rhip 13-Lkne 14-Rkne 15-Lank 16-Rank}
H36M:
0: 'root',
1: 'rhip',
2: 'rkne',
3: 'rank',
4: 'lhip',
5: 'lkne',
6: 'lank',
7: 'belly',
8: 'neck',
9: 'nose',
10: 'head',
11: 'lsho',
12: 'lelb',
13: 'lwri',
14: 'rsho',
15: 'relb',
16: 'rwri'
'''
y = np.zeros(x.shape)
y[:,:,0,:] = (x[:,:,11,:] + x[:,:,12,:]) * 0.5
y[:,:,1,:] = x[:,:,12,:]
y[:,:,2,:] = x[:,:,14,:]
y[:,:,3,:] = x[:,:,16,:]
y[:,:,4,:] = x[:,:,11,:]
y[:,:,5,:] = x[:,:,13,:]
y[:,:,6,:] = x[:,:,15,:]
y[:,:,8,:] = (x[:,:,5,:] + x[:,:,6,:]) * 0.5
y[:,:,7,:] = (y[:,:,0,:] + y[:,:,8,:]) * 0.5
y[:,:,9,:] = x[:,:,0,:]
y[:,:,10,:] = (x[:,:,1,:] + x[:,:,2,:]) * 0.5
y[:,:,11,:] = x[:,:,5,:]
y[:,:,12,:] = x[:,:,7,:]
y[:,:,13,:] = x[:,:,9,:]
y[:,:,14,:] = x[:,:,6,:]
y[:,:,15,:] = x[:,:,8,:]
y[:,:,16,:] = x[:,:,10,:]
return y
def random_move(data_numpy,
angle_range=[-10., 10.],
scale_range=[0.9, 1.1],
transform_range=[-0.1, 0.1],
move_time_candidate=[1]):
data_numpy = np.transpose(data_numpy, (3,1,2,0)) # M,T,V,C-> C,T,V,M
C, T, V, M = data_numpy.shape
move_time = random.choice(move_time_candidate)
node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
node = np.append(node, T)
num_node = len(node)
A = np.random.uniform(angle_range[0], angle_range[1], num_node)
S = np.random.uniform(scale_range[0], scale_range[1], num_node)
T_x = np.random.uniform(transform_range[0], transform_range[1], num_node)
T_y = np.random.uniform(transform_range[0], transform_range[1], num_node)
a = np.zeros(T)
s = np.zeros(T)
t_x = np.zeros(T)
t_y = np.zeros(T)
# linspace
for i in range(num_node - 1):
a[node[i]:node[i + 1]] = np.linspace(
A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1], node[i + 1] - node[i])
t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1], node[i + 1] - node[i])
t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1], node[i + 1] - node[i])
theta = np.array([[np.cos(a) * s, -np.sin(a) * s],
[np.sin(a) * s, np.cos(a) * s]])
# perform transformation
for i_frame in range(T):
xy = data_numpy[0:2, i_frame, :, :]
new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
new_xy[0] += t_x[i_frame]
new_xy[1] += t_y[i_frame]
data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)
data_numpy = np.transpose(data_numpy, (3,1,2,0)) # C,T,V,M -> M,T,V,C
return data_numpy
def human_tracking(x):
M, T = x.shape[:2]
if M==1:
return x
else:
diff0 = np.sum(np.linalg.norm(x[0,1:] - x[0,:-1], axis=-1), axis=-1) # (T-1, V, C) -> (T-1)
diff1 = np.sum(np.linalg.norm(x[0,1:] - x[1,:-1], axis=-1), axis=-1)
x_new = np.zeros(x.shape)
sel = np.cumsum(diff0 > diff1) % 2
sel = sel[:,None,None]
x_new[0][0] = x[0][0]
x_new[1][0] = x[1][0]
x_new[0,1:] = x[1,1:] * sel + x[0,1:] * (1-sel)
x_new[1,1:] = x[0,1:] * sel + x[1,1:] * (1-sel)
return x_new
class ActionDataset(Dataset):
def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1], check_split=True): # data_split: train/test etc.
np.random.seed(0)
dataset = read_pkl(data_path)
if check_split:
assert data_split in dataset['split'].keys()
self.split = dataset['split'][data_split]
annotations = dataset['annotations']
self.random_move = random_move
self.is_train = "train" in data_split or (check_split==False)
if "oneshot" in data_split:
self.is_train = False
self.scale_range = scale_range
motions = []
labels = []
for sample in annotations:
if check_split and (not sample['frame_dir'] in self.split):
continue
resample_id = resample(ori_len=sample['total_frames'], target_len=n_frames, randomness=self.is_train)
motion_cam = make_cam(x=sample['keypoint'], img_shape=sample['img_shape'])
motion_cam = human_tracking(motion_cam)
motion_cam = coco2h36m(motion_cam)
motion_conf = sample['keypoint_score'][..., None]
motion = np.concatenate((motion_cam[:,resample_id], motion_conf[:,resample_id]), axis=-1)
if motion.shape[0]==1: # Single person, make a fake zero person
fake = np.zeros(motion.shape)
motion = np.concatenate((motion, fake), axis=0)
motions.append(motion.astype(np.float32))
labels.append(sample['label'])
self.motions = np.array(motions)
self.labels = np.array(labels)
def __len__(self):
'Denotes the total number of samples'
return len(self.motions)
def __getitem__(self, index):
raise NotImplementedError
class NTURGBD(ActionDataset):
def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1]):
super(NTURGBD, self).__init__(data_path, data_split, n_frames, random_move, scale_range)
def __getitem__(self, idx):
'Generates one sample of data'
motion, label = self.motions[idx], self.labels[idx] # (M,T,J,C)
if self.random_move:
motion = random_move(motion)
if self.scale_range:
result = crop_scale(motion, scale_range=self.scale_range)
else:
result = motion
return result.astype(np.float32), label
class NTURGBD1Shot(ActionDataset):
def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1], check_split=False):
super(NTURGBD1Shot, self).__init__(data_path, data_split, n_frames, random_move, scale_range, check_split)
oneshot_classes = [0, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114]
new_classes = set(range(120)) - set(oneshot_classes)
old2new = {}
for i, cid in enumerate(new_classes):
old2new[cid] = i
filtered = [not (x in oneshot_classes) for x in self.labels]
self.motions = self.motions[filtered]
filtered_labels = self.labels[filtered]
self.labels = [old2new[x] for x in filtered_labels]
def __getitem__(self, idx):
'Generates one sample of data'
motion, label = self.motions[idx], self.labels[idx] # (M,T,J,C)
if self.random_move:
motion = random_move(motion)
if self.scale_range:
result = crop_scale(motion, scale_range=self.scale_range)
else:
result = motion
return result.astype(np.float32), label |