MotionBERT / lib /data /dataset_action.py
kzielins
motion bert project structure added
dbf90d0
raw
history blame
7.92 kB
import torch
import numpy as np
import os
import random
import copy
from torch.utils.data import Dataset, DataLoader
from lib.utils.utils_data import crop_scale, resample
from lib.utils.tools import read_pkl
def get_action_names(file_path = "data/action/ntu_actions.txt"):
f = open(file_path, "r")
s = f.read()
actions = s.split('\n')
action_names = []
for a in actions:
action_names.append(a.split('.')[1][1:])
return action_names
def make_cam(x, img_shape):
'''
Input: x (M x T x V x C)
img_shape (height, width)
'''
h, w = img_shape
if w >= h:
x_cam = x / w * 2 - 1
else:
x_cam = x / h * 2 - 1
return x_cam
def coco2h36m(x):
'''
Input: x (M x T x V x C)
COCO: {0-nose 1-Leye 2-Reye 3-Lear 4Rear 5-Lsho 6-Rsho 7-Lelb 8-Relb 9-Lwri 10-Rwri 11-Lhip 12-Rhip 13-Lkne 14-Rkne 15-Lank 16-Rank}
H36M:
0: 'root',
1: 'rhip',
2: 'rkne',
3: 'rank',
4: 'lhip',
5: 'lkne',
6: 'lank',
7: 'belly',
8: 'neck',
9: 'nose',
10: 'head',
11: 'lsho',
12: 'lelb',
13: 'lwri',
14: 'rsho',
15: 'relb',
16: 'rwri'
'''
y = np.zeros(x.shape)
y[:,:,0,:] = (x[:,:,11,:] + x[:,:,12,:]) * 0.5
y[:,:,1,:] = x[:,:,12,:]
y[:,:,2,:] = x[:,:,14,:]
y[:,:,3,:] = x[:,:,16,:]
y[:,:,4,:] = x[:,:,11,:]
y[:,:,5,:] = x[:,:,13,:]
y[:,:,6,:] = x[:,:,15,:]
y[:,:,8,:] = (x[:,:,5,:] + x[:,:,6,:]) * 0.5
y[:,:,7,:] = (y[:,:,0,:] + y[:,:,8,:]) * 0.5
y[:,:,9,:] = x[:,:,0,:]
y[:,:,10,:] = (x[:,:,1,:] + x[:,:,2,:]) * 0.5
y[:,:,11,:] = x[:,:,5,:]
y[:,:,12,:] = x[:,:,7,:]
y[:,:,13,:] = x[:,:,9,:]
y[:,:,14,:] = x[:,:,6,:]
y[:,:,15,:] = x[:,:,8,:]
y[:,:,16,:] = x[:,:,10,:]
return y
def random_move(data_numpy,
angle_range=[-10., 10.],
scale_range=[0.9, 1.1],
transform_range=[-0.1, 0.1],
move_time_candidate=[1]):
data_numpy = np.transpose(data_numpy, (3,1,2,0)) # M,T,V,C-> C,T,V,M
C, T, V, M = data_numpy.shape
move_time = random.choice(move_time_candidate)
node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
node = np.append(node, T)
num_node = len(node)
A = np.random.uniform(angle_range[0], angle_range[1], num_node)
S = np.random.uniform(scale_range[0], scale_range[1], num_node)
T_x = np.random.uniform(transform_range[0], transform_range[1], num_node)
T_y = np.random.uniform(transform_range[0], transform_range[1], num_node)
a = np.zeros(T)
s = np.zeros(T)
t_x = np.zeros(T)
t_y = np.zeros(T)
# linspace
for i in range(num_node - 1):
a[node[i]:node[i + 1]] = np.linspace(
A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1], node[i + 1] - node[i])
t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1], node[i + 1] - node[i])
t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1], node[i + 1] - node[i])
theta = np.array([[np.cos(a) * s, -np.sin(a) * s],
[np.sin(a) * s, np.cos(a) * s]])
# perform transformation
for i_frame in range(T):
xy = data_numpy[0:2, i_frame, :, :]
new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
new_xy[0] += t_x[i_frame]
new_xy[1] += t_y[i_frame]
data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)
data_numpy = np.transpose(data_numpy, (3,1,2,0)) # C,T,V,M -> M,T,V,C
return data_numpy
def human_tracking(x):
M, T = x.shape[:2]
if M==1:
return x
else:
diff0 = np.sum(np.linalg.norm(x[0,1:] - x[0,:-1], axis=-1), axis=-1) # (T-1, V, C) -> (T-1)
diff1 = np.sum(np.linalg.norm(x[0,1:] - x[1,:-1], axis=-1), axis=-1)
x_new = np.zeros(x.shape)
sel = np.cumsum(diff0 > diff1) % 2
sel = sel[:,None,None]
x_new[0][0] = x[0][0]
x_new[1][0] = x[1][0]
x_new[0,1:] = x[1,1:] * sel + x[0,1:] * (1-sel)
x_new[1,1:] = x[0,1:] * sel + x[1,1:] * (1-sel)
return x_new
class ActionDataset(Dataset):
def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1], check_split=True): # data_split: train/test etc.
np.random.seed(0)
dataset = read_pkl(data_path)
if check_split:
assert data_split in dataset['split'].keys()
self.split = dataset['split'][data_split]
annotations = dataset['annotations']
self.random_move = random_move
self.is_train = "train" in data_split or (check_split==False)
if "oneshot" in data_split:
self.is_train = False
self.scale_range = scale_range
motions = []
labels = []
for sample in annotations:
if check_split and (not sample['frame_dir'] in self.split):
continue
resample_id = resample(ori_len=sample['total_frames'], target_len=n_frames, randomness=self.is_train)
motion_cam = make_cam(x=sample['keypoint'], img_shape=sample['img_shape'])
motion_cam = human_tracking(motion_cam)
motion_cam = coco2h36m(motion_cam)
motion_conf = sample['keypoint_score'][..., None]
motion = np.concatenate((motion_cam[:,resample_id], motion_conf[:,resample_id]), axis=-1)
if motion.shape[0]==1: # Single person, make a fake zero person
fake = np.zeros(motion.shape)
motion = np.concatenate((motion, fake), axis=0)
motions.append(motion.astype(np.float32))
labels.append(sample['label'])
self.motions = np.array(motions)
self.labels = np.array(labels)
def __len__(self):
'Denotes the total number of samples'
return len(self.motions)
def __getitem__(self, index):
raise NotImplementedError
class NTURGBD(ActionDataset):
def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1]):
super(NTURGBD, self).__init__(data_path, data_split, n_frames, random_move, scale_range)
def __getitem__(self, idx):
'Generates one sample of data'
motion, label = self.motions[idx], self.labels[idx] # (M,T,J,C)
if self.random_move:
motion = random_move(motion)
if self.scale_range:
result = crop_scale(motion, scale_range=self.scale_range)
else:
result = motion
return result.astype(np.float32), label
class NTURGBD1Shot(ActionDataset):
def __init__(self, data_path, data_split, n_frames=243, random_move=True, scale_range=[1,1], check_split=False):
super(NTURGBD1Shot, self).__init__(data_path, data_split, n_frames, random_move, scale_range, check_split)
oneshot_classes = [0, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114]
new_classes = set(range(120)) - set(oneshot_classes)
old2new = {}
for i, cid in enumerate(new_classes):
old2new[cid] = i
filtered = [not (x in oneshot_classes) for x in self.labels]
self.motions = self.motions[filtered]
filtered_labels = self.labels[filtered]
self.labels = [old2new[x] for x in filtered_labels]
def __getitem__(self, idx):
'Generates one sample of data'
motion, label = self.motions[idx], self.labels[idx] # (M,T,J,C)
if self.random_move:
motion = random_move(motion)
if self.scale_range:
result = crop_scale(motion, scale_range=self.scale_range)
else:
result = motion
return result.astype(np.float32), label