FlexRAG's picture
Update FlexRAG retriever
35991b0 verified
index_type: faiss
annoy_config:
distance_function: IP
index_train_num: 1000000
log_interval: 10000
batch_size: 512
n_trees: -1
n_jobs: -1
search_k: -1
on_disk_build: false
faiss_config:
distance_function: IP
index_train_num: 1000000
log_interval: 10000
batch_size: 512
index_type: auto
n_subquantizers: 8
n_bits: 8
n_list: 1000
factory_str: null
n_probe: 32
device_id: []
k_factor: 10
polysemous_ht: 0
efSearch: 100
scann_config:
distance_function: IP
index_train_num: 1000000
log_interval: 10000
batch_size: 512
num_leaves: 2000
num_leaves_to_search: 500
num_neighbors: 10
anisotropic_quantization_threshold: 0.2
dimensions_per_block: 2
threads: 0
log_interval: 100000
top_k: 10
batch_size: 512
query_preprocess_pipeline:
processor_type: []
length_filter_config:
max_tokens: null
min_tokens: null
max_chars: null
min_chars: null
max_bytes: null
min_bytes: null
tokenizer_config:
tokenizer_type: moses
hf_tokenizer_path: null
tiktok_tokenizer_name: null
lang: null
token_normalize_config:
lang: en
penn: true
norm_quote_commas: true
norm_numbers: true
pre_replace_unicode_punct: false
post_remove_control_chars: false
perl_parity: false
truncate_config:
max_chars: null
max_bytes: null
max_tokens: null
tokenizer_config:
tokenizer_type: moses
hf_tokenizer_path: null
tiktok_tokenizer_name: null
lang: null
database_path: /data/zhangzhuocheng/Lab/Python/LLM/datasets/RAG/wikipedia/wiki_2021/contriever
query_encoder_config:
encoder_type: hf
cohere_config:
model: embed-multilingual-v3.0
input_type: search_document
base_url: null
api_key: ???
proxy: null
hf_config:
model_path: facebook/contriever-msmarco
tokenizer_path: null
trust_remote_code: false
device_id: []
load_dtype: auto
max_encode_length: 512
encode_method: mean
normalize: false
prompt: ''
task: ''
hf_clip_config:
model_path: ???
tokenizer_path: null
trust_remote_code: false
device_id: []
load_dtype: auto
max_encode_length: 512
normalize: false
convert_to_rgb: false
jina_config:
model: jina-embeddings-v3
base_url: https://api.jina.ai/v1/embeddings
api_key: ???
dimensions: 1024
task: null
proxy: null
ollama_config:
model_name: ???
base_url: ???
prompt: null
verbose: false
embedding_size: 768
allow_parallel: true
openai_config:
is_azure: false
model_name: ???
base_url: null
api_key: EMPTY
api_version: 2024-07-01-preview
verbose: false
proxy: null
dimension: null
sentence_transformer_config:
model_path: ???
device_id: []
trust_remote_code: false
task: null
prompt_name: null
prompt: null
prompt_dict: null
normalize: false
model_kwargs: {}
passage_encoder_config:
encoder_type: hf
cohere_config:
model: embed-multilingual-v3.0
input_type: search_document
base_url: null
api_key: ???
proxy: null
hf_config:
model_path: facebook/contriever-msmarco
tokenizer_path: null
trust_remote_code: false
device_id: []
load_dtype: auto
max_encode_length: 512
encode_method: mean
normalize: false
prompt: ''
task: ''
hf_clip_config:
model_path: ???
tokenizer_path: null
trust_remote_code: false
device_id: []
load_dtype: auto
max_encode_length: 512
normalize: false
convert_to_rgb: false
jina_config:
model: jina-embeddings-v3
base_url: https://api.jina.ai/v1/embeddings
api_key: ???
dimensions: 1024
task: null
proxy: null
ollama_config:
model_name: ???
base_url: ???
prompt: null
verbose: false
embedding_size: 768
allow_parallel: true
openai_config:
is_azure: false
model_name: ???
base_url: null
api_key: EMPTY
api_version: 2024-07-01-preview
verbose: false
proxy: null
dimension: null
sentence_transformer_config:
model_path: ???
device_id: []
trust_remote_code: false
task: null
prompt_name: null
prompt: null
prompt_dict: null
normalize: false
model_kwargs: {}
encode_fields:
- title
- section
- text