enwiki_2018_atlas / config.yaml
FlexRAG's picture
Upload retriever to Hugging Face Hub
08f2fe0 verified
log_interval: 100000
top_k: 10
batch_size: 4096
query_preprocess_pipeline:
processor_type: []
length_filter_config:
max_tokens: null
min_tokens: null
max_chars: null
min_chars: null
max_bytes: null
min_bytes: null
tokenizer_config:
tokenizer_type: moses
hf_tokenizer_path: null
tiktok_tokenizer_name: null
lang: null
token_normalize_config:
lang: en
penn: true
norm_quote_commas: true
norm_numbers: true
pre_replace_unicode_punct: false
post_remove_control_chars: false
perl_parity: false
truncate_config:
max_chars: null
max_bytes: null
max_tokens: null
tokenizer_config:
tokenizer_type: moses
hf_tokenizer_path: null
tiktok_tokenizer_name: null
lang: null
retriever_path: null
indexes_merge_method: linear
used_indexes: null