|
index_type: faiss |
|
annoy_config: |
|
distance_function: IP |
|
index_train_num: 1000000 |
|
log_interval: 10000 |
|
batch_size: 512 |
|
n_trees: -1 |
|
n_jobs: -1 |
|
search_k: -1 |
|
on_disk_build: false |
|
faiss_config: |
|
distance_function: IP |
|
index_train_num: 1000000 |
|
log_interval: 10000 |
|
batch_size: 512 |
|
index_type: auto |
|
n_subquantizers: 8 |
|
n_bits: 8 |
|
n_list: 1000 |
|
factory_str: null |
|
n_probe: 32 |
|
device_id: [] |
|
k_factor: 10 |
|
polysemous_ht: 0 |
|
efSearch: 100 |
|
scann_config: |
|
distance_function: IP |
|
index_train_num: 1000000 |
|
log_interval: 10000 |
|
batch_size: 512 |
|
num_leaves: 2000 |
|
num_leaves_to_search: 500 |
|
num_neighbors: 10 |
|
anisotropic_quantization_threshold: 0.2 |
|
dimensions_per_block: 2 |
|
threads: 0 |
|
log_interval: 100000 |
|
top_k: 10 |
|
batch_size: 512 |
|
query_preprocess_pipeline: |
|
processor_type: [] |
|
length_filter_config: |
|
max_tokens: null |
|
min_tokens: null |
|
max_chars: null |
|
min_chars: null |
|
max_bytes: null |
|
min_bytes: null |
|
tokenizer_config: |
|
tokenizer_type: moses |
|
hf_tokenizer_path: null |
|
tiktok_tokenizer_name: null |
|
lang: null |
|
token_normalize_config: |
|
lang: en |
|
penn: true |
|
norm_quote_commas: true |
|
norm_numbers: true |
|
pre_replace_unicode_punct: false |
|
post_remove_control_chars: false |
|
perl_parity: false |
|
truncate_config: |
|
max_chars: null |
|
max_bytes: null |
|
max_tokens: null |
|
tokenizer_config: |
|
tokenizer_type: moses |
|
hf_tokenizer_path: null |
|
tiktok_tokenizer_name: null |
|
lang: null |
|
database_path: /data/zhangzhuocheng/Lab/Python/LLM/datasets/RAG/wikipedia/wiki_2021/contriever |
|
query_encoder_config: |
|
encoder_type: hf |
|
cohere_config: |
|
model: embed-multilingual-v3.0 |
|
input_type: search_document |
|
base_url: null |
|
api_key: ??? |
|
proxy: null |
|
hf_config: |
|
model_path: facebook/contriever-msmarco |
|
tokenizer_path: null |
|
trust_remote_code: false |
|
device_id: [] |
|
load_dtype: auto |
|
max_encode_length: 512 |
|
encode_method: mean |
|
normalize: false |
|
prompt: '' |
|
task: '' |
|
hf_clip_config: |
|
model_path: ??? |
|
tokenizer_path: null |
|
trust_remote_code: false |
|
device_id: [] |
|
load_dtype: auto |
|
max_encode_length: 512 |
|
normalize: false |
|
convert_to_rgb: false |
|
jina_config: |
|
model: jina-embeddings-v3 |
|
base_url: https://api.jina.ai/v1/embeddings |
|
api_key: ??? |
|
dimensions: 1024 |
|
task: null |
|
proxy: null |
|
ollama_config: |
|
model_name: ??? |
|
base_url: ??? |
|
prompt: null |
|
verbose: false |
|
embedding_size: 768 |
|
allow_parallel: true |
|
openai_config: |
|
is_azure: false |
|
model_name: ??? |
|
base_url: null |
|
api_key: EMPTY |
|
api_version: 2024-07-01-preview |
|
verbose: false |
|
proxy: null |
|
dimension: null |
|
sentence_transformer_config: |
|
model_path: ??? |
|
device_id: [] |
|
trust_remote_code: false |
|
task: null |
|
prompt_name: null |
|
prompt: null |
|
prompt_dict: null |
|
normalize: false |
|
model_kwargs: {} |
|
passage_encoder_config: |
|
encoder_type: hf |
|
cohere_config: |
|
model: embed-multilingual-v3.0 |
|
input_type: search_document |
|
base_url: null |
|
api_key: ??? |
|
proxy: null |
|
hf_config: |
|
model_path: facebook/contriever-msmarco |
|
tokenizer_path: null |
|
trust_remote_code: false |
|
device_id: [] |
|
load_dtype: auto |
|
max_encode_length: 512 |
|
encode_method: mean |
|
normalize: false |
|
prompt: '' |
|
task: '' |
|
hf_clip_config: |
|
model_path: ??? |
|
tokenizer_path: null |
|
trust_remote_code: false |
|
device_id: [] |
|
load_dtype: auto |
|
max_encode_length: 512 |
|
normalize: false |
|
convert_to_rgb: false |
|
jina_config: |
|
model: jina-embeddings-v3 |
|
base_url: https://api.jina.ai/v1/embeddings |
|
api_key: ??? |
|
dimensions: 1024 |
|
task: null |
|
proxy: null |
|
ollama_config: |
|
model_name: ??? |
|
base_url: ??? |
|
prompt: null |
|
verbose: false |
|
embedding_size: 768 |
|
allow_parallel: true |
|
openai_config: |
|
is_azure: false |
|
model_name: ??? |
|
base_url: null |
|
api_key: EMPTY |
|
api_version: 2024-07-01-preview |
|
verbose: false |
|
proxy: null |
|
dimension: null |
|
sentence_transformer_config: |
|
model_path: ??? |
|
device_id: [] |
|
trust_remote_code: false |
|
task: null |
|
prompt_name: null |
|
prompt: null |
|
prompt_dict: null |
|
normalize: false |
|
model_kwargs: {} |
|
encode_fields: |
|
- title |
|
- section |
|
- text |
|
|