Spaces:
Runtime error
Runtime error
File size: 5,749 Bytes
1c60c6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# coding=utf-8
# Copyright 2021-present, the Recognai S.L. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Common environment vars / settings
"""
import logging
from typing import List, Optional
from urllib.parse import urlparse
from pydantic import BaseSettings, Field
from argilla._constants import DEFAULT_MAX_KEYWORD_LENGTH
class ApiSettings(BaseSettings):
"""
Main api settings. The pydantic BaseSettings class makes
accessible environment variables by setting attributes.
See <https://pydantic-docs.helpmanual.io/usage/settings/>
only_bulk_api: (ONLY_BULK_API env var)
If True, activate only bulk and search endpoints
elasticseach: (ELASTICSEARCH env var)
The elasticsearch endpoint for datasets persistence
cors_origins: (CORS_ORIGINS env var)
List of host patterns for CORS origin access
docs_enabled: True
If True, enable openapi docs endpoint at /api/docs
es_records_index_shards:
Configures the number of shards for dataset records index creation. Default=1
es_records_index_replicas:
Configures the number of shard replicas for dataset records index creation. Default=0
disable_es_index_template_creation: (DISABLE_ES_INDEX_TEMPLATE_CREATION env var)
Allowing advanced users to create their own es index settings and mappings. Default=False
"""
__LOGGER__ = logging.getLogger(__name__)
__DATASETS_INDEX_NAME__ = "ar.datasets"
__DATASETS_RECORDS_INDEX_NAME__ = "ar.dataset.{}"
elasticsearch: str = "http://localhost:9200"
elasticsearch_ssl_verify: bool = True
elasticsearch_ca_path: Optional[str] = None
cors_origins: List[str] = ["*"]
docs_enabled: bool = True
namespace: str = Field(default=None, regex=r"^[a-z]+$")
enable_migration: bool = Field(
default=False,
description="If enabled, try to migrate data from old rubrix installation",
)
# Analyzer configuration
default_es_search_analyzer: str = "standard"
exact_es_search_analyzer: str = "whitespace"
# This line will be enabled once words field won't be used anymore
# wordcloud_es_search_analyzer: str = "multilingual_stop_analyzer"
es_records_index_shards: int = 1
es_records_index_replicas: int = 0
metadata_fields_limit: int = Field(
default=50,
gt=0,
le=100,
description="Max number of fields in metadata",
)
metadata_field_length: int = Field(
default=DEFAULT_MAX_KEYWORD_LENGTH,
description="Max length supported for the string metadata fields."
" Values containing higher than this will be truncated",
)
enable_telemetry: bool = True
telemetry_key: Optional[str] = None
@property
def dataset_index_name(self) -> str:
ns = self.namespace
if ns:
return f"{self.namespace}.{self.__DATASETS_INDEX_NAME__}"
return self.__DATASETS_INDEX_NAME__
@property
def dataset_records_index_name(self) -> str:
ns = self.namespace
if ns:
return f"{self.namespace}.{self.__DATASETS_RECORDS_INDEX_NAME__}"
return self.__DATASETS_RECORDS_INDEX_NAME__
@property
def old_dataset_index_name(self) -> str:
index_name = ".rubrix<NAMESPACE>.datasets-v0"
ns = self.namespace
if ns is None:
return index_name.replace("<NAMESPACE>", "")
return index_name.replace("<NAMESPACE>", f".{ns}")
@property
def old_dataset_records_index_name(self) -> str:
index_name = ".rubrix<NAMESPACE>.dataset.{}.records-v0"
ns = self.namespace
if ns is None:
return index_name.replace("<NAMESPACE>", "")
return index_name.replace("<NAMESPACE>", f".{ns}")
def obfuscated_elasticsearch(self) -> str:
"""Returns configured elasticsearch url obfuscating the provided password, if any"""
parsed = urlparse(self.elasticsearch)
if parsed.password:
return self.elasticsearch.replace(parsed.password, "XXXX")
return self.elasticsearch
class Config:
# TODO: include a common prefix for all argilla env vars.
env_prefix = "ARGILLA_"
fields = {
# TODO(@frascuchon): Remove in 0.20.0
"elasticsearch": {
"env": ["ELASTICSEARCH", f"{env_prefix}ELASTICSEARCH"],
},
"elasticsearch_ssl_verify": {
"env": [
"ELASTICSEARCH_SSL_VERIFY",
f"{env_prefix}ELASTICSEARCH_SSL_VERIFY",
]
},
"cors_origins": {"env": ["CORS_ORIGINS", f"{env_prefix}CORS_ORIGINS"]},
"docs_enabled": {"env": ["DOCS_ENABLED", f"{env_prefix}DOCS_ENABLED"]},
"es_records_index_shards": {
"env": [
"ES_RECORDS_INDEX_SHARDS",
f"{env_prefix}ES_RECORDS_INDEX_SHARDS",
]
},
"es_records_index_replicas": {
"env": [
"ES_RECORDS_INDEX_REPLICAS",
f"{env_prefix}ES_RECORDS_INDEX_SHARDS",
]
},
}
settings = ApiSettings()
|