File size: 1,012 Bytes
3a1da90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
export CUDA_VISIBLE_DEVICES=4,5,6,7
NUM_GPUS=$(echo ${CUDA_VISIBLE_DEVICES:-""} | tr ',' '\n' | wc -l)
btz=72
num_iterations=200_000
exp_id=AC_${btz}_numgpus${NUM_GPUS}_niter${num_iterations}_T5_CLAP_meanflow_improved_changecfg_seed1415926_flowratio0.75
text_encoder_name=t5_clap
weights=./weights/fluxaudio_fm.pth # pre-trained weigths to be loaded for mix-field finetuning
text_c_dim=512 # 1024 + 512
model=meanaudio_mf # meanaudio_mf, fluxaudio_fm
OMP_NUM_THREADS=1 \
CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES \
torchrun --standalone --nproc_per_node=$NUM_GPUS \
train.py \
--config-name train_config.yaml \
exp_id=$exp_id \
compile=False \
model=$model \
batch_size=${btz} \
eval_batch_size=32 \
num_iterations=$num_iterations \
text_encoder_name=$text_encoder_name \
data_dim.text_c_dim=$text_c_dim \
pin_memory=False \
num_workers=10 \
ac_oversample_rate=5 \
weights=$weights \
++use_rope=True \
++use_wandb=True \
++debug=False |