Spaces:
Sleeping
Sleeping
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Copyright 2020 Imperial College London (Pingchuan Ma) | |
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) | |
"""Transforms mp4 audio to npz. Code has strong assumptions on the dataset organization!""" | |
import os | |
import librosa # μμ λ°μ΄ν° λΆμ λΌμ΄λΈλ¬λ¦¬ | |
import argparse # λͺ λ Ήν μΈμλ₯Ό νμ±ν΄μ£Όλ λͺ¨λ | |
from utils import * # utils.py λͺ¨λμ μλ λͺ¨λ ν¨μ(read_txt_lines(), save2npz(), read_video()) λΆλ¬μ€κΈ° | |
# μΈμκ°μ λ°μμ μ²λ¦¬νλ ν¨μ | |
def load_args(default_config=None): | |
# μΈμκ°μ λ°μ μ μλ μΈμ€ν΄μ€ μμ± | |
parser = argparse.ArgumentParser(description='Extract Audio Waveforms') | |
# μ λ ₯λ°μ μΈμκ° λ±λ‘ | |
# -- utils | |
parser.add_argument('--video-direc', default=None, help='raw video directory') | |
parser.add_argument('--filename-path', default='./vietnamese_detected_face_30.csv', help='list of detected video and its subject ID') | |
parser.add_argument('--save-direc', default=None, help='the directory of saving audio waveforms (.npz)') | |
# -- test set only | |
parser.add_argument('--testset-only', default=False, action='store_true', help='process testing set only') | |
# μ λ ₯λ°μ μΈμκ°μ argsμ μ μ₯ (type: namespace) | |
args = parser.parse_args() | |
return args | |
args = load_args() # args νμ± λ° λ‘λ | |
lines = open(args.filename_path).read().splitlines() # λ¬Έμμ΄μ '\m' κΈ°μ€μΌλ‘ μͺΌκ° ν list μμ± | |
lines = list(filter(lambda x: 'test' == x.split('/')[-2], lines)) if args.testset_only else lines # args.testset_only κ°μ΄ μλ€λ©΄ test ν΄λ μ νμΌλͺ λ§ λΆλ¬μμ list μμ±, μλλΌλ©΄ μλ lines κ·Έλλ‘ κ° μ μ§ | |
# lines κ°μλ§νΌ λ°λ³΅λ¬Έ μ€ν | |
for filename_idx, line in enumerate(lines): | |
# νμΌλͺ , μ¬λid | |
filename, person_id = line.split(',') | |
print('idx: {} \tProcessing.\t{}'.format(filename_idx, filename)) # νμΌ μΈλ±μ€λ²νΈ, νμΌλͺ μΆλ ₯ | |
video_pathname = os.path.join(args.video_direc, filename+'.mp4') # μμλλ ν 리 + νμΌλͺ .mp4 | |
dst_pathname = os.path.join( args.save_direc, filename+'.npz') # μ μ₯λλ ν 리 + νμΌλͺ .npz | |
# νμΌμ΄ μλμ§ νμΈ, μμΌλ©΄ AssertionError λ©μμ§λ₯Ό λμ | |
assert os.path.isfile(video_pathname), "File does not exist. Path input: {}".format(video_pathname) # μνλ 쑰건μ λ³μκ°μ 보μ¦νκΈ° μν΄ μ¬μ© | |
# wav νμΌ μ½λ λΌμ΄λΈλ¬λ¦¬: librosa | |
# librosa λ‘ λ°μ΄ν°λ₯Ό μ½μΌλ©΄ λ°μ΄ν° λ²μκ° [-1,1]λ‘ μ κ·νλ¨ | |
# librosa μ λ ₯μμ sr=None μΌλ‘ μ§μ νμ§ μκ³ μμμ sample_rateλ₯Ό μ€μ νλ©΄ loadν λ resampling μνν¨ | |
data = librosa.load(video_pathname, sr=16000)[0][-19456:] | |
save2npz(dst_pathname, data=data) # librosa λ‘ μ½μ λ°μ΄ν°λ₯Ό npz νμμΌλ‘ μ μ₯ |