CallyticsDemo / config /nemo /diar_infer_telephonic.yaml
bunyaminergen's picture
Initial
7d90704
name: "ClusterDiarizer"
num_workers: 1
sample_rate: 16000
batch_size: 64
device: cpu
verbose: True
diarizer:
manifest_filepath: .temp/manifest.json
out_dir: .temp
oracle_vad: False
collar: 0.25
ignore_overlap: True
vad:
model_path: vad_multilingual_marblenet
external_vad_manifest: null
parameters:
window_length_in_sec: 0.15
shift_length_in_sec: 0.01
smoothing: "median"
overlap: 0.5
onset: 0.1
offset: 0.1
pad_onset: 0.1
pad_offset: 0
min_duration_on: 0
min_duration_off: 0.2
filter_speech_first: True
speaker_embeddings:
model_path: titanet_large
parameters:
window_length_in_sec: [ 1.5,1.25,1.0,0.75,0.5 ]
shift_length_in_sec: [ 0.75,0.625,0.5,0.375,0.25 ]
multiscale_weights: [ 1,1,1,1,1 ]
save_embeddings: True
clustering:
parameters:
oracle_num_speakers: False
max_num_speakers: 8
enhanced_count_thres: 80
max_rp_threshold: 0.25
sparse_search_volume: 30
maj_vote_spk_count: False
chunk_cluster_count: 50
embeddings_per_chunk: 10000
msdd_model:
model_path: diar_msdd_telephonic
parameters:
use_speaker_model_from_ckpt: True
infer_batch_size: 25
sigmoid_threshold: [ 0.7 ]
seq_eval_mode: False
split_infer: True
diar_window_length: 50
overlap_infer_spk_limit: 5
asr:
model_path: stt_en_conformer_ctc_large
parameters:
asr_based_vad: False
asr_based_vad_threshold: 1.0
asr_batch_size: null
decoder_delay_in_sec: null
word_ts_anchor_offset: null
word_ts_anchor_pos: "start"
fix_word_ts_with_VAD: False
colored_text: False
print_time: True
break_lines: False
ctc_decoder_parameters:
pretrained_language_model: null
beam_width: 32
alpha: 0.5
beta: 2.5
realigning_lm_parameters:
arpa_language_model: null
min_number_of_words: 3
max_number_of_words: 10
logprob_diff_threshold: 1.2