Spaces:
Sleeping
Sleeping
Update constants/models.py
Browse files- constants/models.py +11 -0
constants/models.py
CHANGED
@@ -12,6 +12,7 @@ MODEL_MAP = {
|
|
12 |
"Hermes-3-Llama-3.1-8B": "NousResearch/Hermes-3-Llama-3.1-8B",
|
13 |
"Mistral-Nemo-Instruct-2407": "mistralai/Mistral-Nemo-Instruct-2407",
|
14 |
"Phi-3.5-mini-instruct": "microsoft/Phi-3.5-mini-instruct",
|
|
|
15 |
"nous-mixtral-8x7b": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
|
16 |
"mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2",
|
17 |
"yi-1.5-34b": "01-ai/Yi-1.5-34B-Chat",
|
@@ -53,6 +54,7 @@ STOP_SEQUENCES_MAP = {
|
|
53 |
"Qwen2.5-72B-Instruct":"<|im_end|>",
|
54 |
# https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B/blob/main/tokenizer_config.json
|
55 |
"DeepSeek-R1-Distill-Qwen-32B":"<|end▁of▁sentence|>",
|
|
|
56 |
|
57 |
# "openchat-3.5": "<|end_of_turn|>",
|
58 |
# "command-r-plus": "<|END_OF_TURN_TOKEN|>",
|
@@ -68,6 +70,7 @@ TOKEN_LIMIT_MAP = {
|
|
68 |
"c4ai-command-r-plus-08-2024":1000000000000000019884624838656,
|
69 |
"Qwen2.5-72B-Instruct":131072,
|
70 |
"DeepSeek-R1-Distill-Qwen-32B":131072,
|
|
|
71 |
|
72 |
# "openchat-3.5": 8192,
|
73 |
# "command-r-plus": 32768,
|
@@ -109,6 +112,14 @@ AVAILABLE_MODELS_DICTS = [
|
|
109 |
"created": 1700000000,
|
110 |
"owned_by": "deepseek-ai"
|
111 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
{
|
113 |
"id": "Llama-3.1-Nemotron-70B-Instruct-HF",
|
114 |
"description": "[nvidia/Llama-3.1-Nemotron-70B-Instruct-HF]: https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
|
|
12 |
"Hermes-3-Llama-3.1-8B": "NousResearch/Hermes-3-Llama-3.1-8B",
|
13 |
"Mistral-Nemo-Instruct-2407": "mistralai/Mistral-Nemo-Instruct-2407",
|
14 |
"Phi-3.5-mini-instruct": "microsoft/Phi-3.5-mini-instruct",
|
15 |
+
"DeepSeek-V3-0324": "deepseek-ai/DeepSeek-V3-0324",
|
16 |
"nous-mixtral-8x7b": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
|
17 |
"mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2",
|
18 |
"yi-1.5-34b": "01-ai/Yi-1.5-34B-Chat",
|
|
|
54 |
"Qwen2.5-72B-Instruct":"<|im_end|>",
|
55 |
# https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B/blob/main/tokenizer_config.json
|
56 |
"DeepSeek-R1-Distill-Qwen-32B":"<|end▁of▁sentence|>",
|
57 |
+
"DeepSeek-V3-0324":"<|end▁of▁sentence|>",
|
58 |
|
59 |
# "openchat-3.5": "<|end_of_turn|>",
|
60 |
# "command-r-plus": "<|END_OF_TURN_TOKEN|>",
|
|
|
70 |
"c4ai-command-r-plus-08-2024":1000000000000000019884624838656,
|
71 |
"Qwen2.5-72B-Instruct":131072,
|
72 |
"DeepSeek-R1-Distill-Qwen-32B":131072,
|
73 |
+
"DeepSeek-R1-Distill-Qwen-32B":131072,
|
74 |
|
75 |
# "openchat-3.5": 8192,
|
76 |
# "command-r-plus": 32768,
|
|
|
112 |
"created": 1700000000,
|
113 |
"owned_by": "deepseek-ai"
|
114 |
},
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"id": "DeepSeek-V3-0324",
|
118 |
+
"description": "[deepseek-ai/DeepSeek-V3-0324]: https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
|
119 |
+
"object": "model",
|
120 |
+
"created": 1700000000,
|
121 |
+
"owned_by": "deepseek-ai"
|
122 |
+
},
|
123 |
{
|
124 |
"id": "Llama-3.1-Nemotron-70B-Instruct-HF",
|
125 |
"description": "[nvidia/Llama-3.1-Nemotron-70B-Instruct-HF]: https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|