File size: 2,834 Bytes
c42fe7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/bin/bash

# ========== CONFIGURATION ==========

INPUT_DS=$1
OUTPUT_DIR=$2
TITLE=$(basename "$INPUT_DS" .ds)

# Inference configuration
VARIANCE_EXP="regular_variance_v1"
ACOUSTIC_EXP="debug_test"
ACOUSTIC_CKPT=""       # Set this to e.g., 50000 to specify a checkpoint
DIFFUSION_STEPS=100    # Set to desired number of diffusion steps
SEED=42
NUM_RUNS=1
MEL_ONLY=false         # Set to true to save only mel spectrograms, not waveform

# ========== CHECK INPUTS ==========

if [ -z "$INPUT_DS" ] || [ -z "$OUTPUT_DIR" ]; then
  echo "Usage: ./inference.sh path/to/input.ds path/to/output_dir"
  exit 1
fi

mkdir -p "$OUTPUT_DIR"

# ========== LOGGING SETUP ==========

LOG="$OUTPUT_DIR/${TITLE}_inference.log"
exec > >(tee -a "$LOG") 2>&1

echo ">>> Starting inference for: $INPUT_DS"
echo ">>> Logging to: $LOG"

# ========== STEP 0: VOCODER CHECK (dynamic) ==========

if [ "$MEL_ONLY" = false ]; then
  # Find the highest-step vocoder checkpoint in the acoustic experiment folder
  EXTRACT_ROOT="/tmp/cantussvs_v1"
  VOCODER_PATH=$(find "$EXTRACT_ROOT/checkpoints/$ACOUSTIC_EXP" -maxdepth 1 -type f -name 'model_ckpt_steps_*.ckpt' | sort -V | tail -n 1)

  if [ -z "$VOCODER_PATH" ]; then
    echo "❌ No vocoder checkpoint found in $EXTRACT_ROOT/checkpoints/$ACOUSTIC_EXP/"
    echo "   You can disable waveform synthesis by setting MEL_ONLY=true in the script."
    exit 1
  else
    echo ">>> Using vocoder checkpoint: $VOCODER_PATH"
  fi
fi

# ========== STEP 1: VARIANCE INFERENCE ==========

echo ">>> Step 1: Running variance model inference..."
python scripts/infer.py variance "$INPUT_DS" \
  --exp "$VARIANCE_EXP" \
  --predict dur --predict pitch \
  --out "$OUTPUT_DIR" \
  --title "$TITLE" \
  --seed "$SEED"

VARIANCE_EXIT_CODE=$?
COMPLETED_DS="$OUTPUT_DIR/${TITLE}.ds"

if [ $VARIANCE_EXIT_CODE -ne 0 ] || [ ! -f "$COMPLETED_DS" ]; then
  echo "❌ Variance inference failed or output DS file was not created."
  exit 1
fi

# ========== STEP 2: ACOUSTIC INFERENCE ==========

echo ">>> Step 2: Running acoustic model inference..."

ACOUSTIC_ARGS=(
  python scripts/infer.py acoustic "$COMPLETED_DS"
  --exp "$ACOUSTIC_EXP"
  --out "$OUTPUT_DIR"
  --title "$TITLE"
  --seed "$SEED"
  --num "$NUM_RUNS"
  --steps "$DIFFUSION_STEPS"
)

if [ -n "$ACOUSTIC_CKPT" ]; then
  ACOUSTIC_ARGS+=(--ckpt "$ACOUSTIC_CKPT")
fi

if [ "$MEL_ONLY" = true ]; then
  ACOUSTIC_ARGS+=(--mel)
fi

"${ACOUSTIC_ARGS[@]}"
ACOUSTIC_EXIT_CODE=$?

# Check output WAV file (only if not mel-only)
if [ "$MEL_ONLY" = false ]; then
  OUTPUT_WAV="$OUTPUT_DIR/${TITLE}.wav"
  if [ $ACOUSTIC_EXIT_CODE -ne 0 ] || [ ! -f "$OUTPUT_WAV" ]; then
    echo "❌ Acoustic inference failed or output WAV file not created."
    exit 1
  fi
  echo "βœ… Done! Output saved to: $OUTPUT_WAV"
else
  echo "βœ… Done! Mel-spectrogram(s) saved to: $OUTPUT_DIR"
fi