Spaces:
Build error
Build error
Merge pull request #40 from DL4DS/remove_tinyllama
Browse filesRemove TinyLlama from LFS and add caching mechanism
code/modules/chat/chat_model_loader.py
CHANGED
|
@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
|
|
| 5 |
import torch
|
| 6 |
import transformers
|
| 7 |
import os
|
|
|
|
|
|
|
| 8 |
from langchain.callbacks.manager import CallbackManager
|
| 9 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 10 |
|
|
@@ -14,6 +16,14 @@ class ChatModelLoader:
|
|
| 14 |
self.config = config
|
| 15 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def load_chat_model(self):
|
| 18 |
if self.config["llm_params"]["llm_loader"] == "openai":
|
| 19 |
llm = ChatOpenAI(
|
|
@@ -21,7 +31,7 @@ class ChatModelLoader:
|
|
| 21 |
)
|
| 22 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
| 23 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
| 24 |
-
model_path = self.config["llm_params"]["local_llm_params"]["model"]
|
| 25 |
llm = LlamaCpp(
|
| 26 |
model_path=model_path,
|
| 27 |
n_batch=n_batch,
|
|
|
|
| 5 |
import torch
|
| 6 |
import transformers
|
| 7 |
import os
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from huggingface_hub import hf_hub_download
|
| 10 |
from langchain.callbacks.manager import CallbackManager
|
| 11 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
| 12 |
|
|
|
|
| 16 |
self.config = config
|
| 17 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 18 |
|
| 19 |
+
def _verify_model_cache(self, model_cache_path):
|
| 20 |
+
hf_hub_download(
|
| 21 |
+
repo_id=self.config["llm_params"]["local_llm_params"]["repo_id"],
|
| 22 |
+
filename=self.config["llm_params"]["local_llm_params"]["filename"],
|
| 23 |
+
cache_dir=model_cache_path
|
| 24 |
+
)
|
| 25 |
+
return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
|
| 26 |
+
|
| 27 |
def load_chat_model(self):
|
| 28 |
if self.config["llm_params"]["llm_loader"] == "openai":
|
| 29 |
llm = ChatOpenAI(
|
|
|
|
| 31 |
)
|
| 32 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
| 33 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
| 34 |
+
model_path = self._verify_model_cache(self.config["llm_params"]["local_llm_params"]["model"])
|
| 35 |
llm = LlamaCpp(
|
| 36 |
model_path=model_path,
|
| 37 |
n_batch=n_batch,
|
code/modules/config/config.yml
CHANGED
|
@@ -34,6 +34,8 @@ llm_params:
|
|
| 34 |
local_llm_params:
|
| 35 |
model: 'tiny-llama'
|
| 36 |
temperature: 0.7
|
|
|
|
|
|
|
| 37 |
pdf_reader: 'llama' # str [llama, pymupdf, gpt]
|
| 38 |
|
| 39 |
chat_logging:
|
|
@@ -50,4 +52,4 @@ splitter_options:
|
|
| 50 |
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
|
| 51 |
front_chunks_to_remove : null # int or None
|
| 52 |
last_chunks_to_remove : null # int or None
|
| 53 |
-
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|
|
|
|
| 34 |
local_llm_params:
|
| 35 |
model: 'tiny-llama'
|
| 36 |
temperature: 0.7
|
| 37 |
+
repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
|
| 38 |
+
filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
|
| 39 |
pdf_reader: 'llama' # str [llama, pymupdf, gpt]
|
| 40 |
|
| 41 |
chat_logging:
|
|
|
|
| 52 |
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
|
| 53 |
front_chunks_to_remove : null # int or None
|
| 54 |
last_chunks_to_remove : null # int or None
|
| 55 |
+
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|
code/modules/config/constants.py
CHANGED
|
@@ -86,5 +86,5 @@ Question: {question}
|
|
| 86 |
|
| 87 |
# Model Paths
|
| 88 |
|
| 89 |
-
LLAMA_PATH = "../storage/models/tinyllama
|
| 90 |
MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
|
|
|
|
| 86 |
|
| 87 |
# Model Paths
|
| 88 |
|
| 89 |
+
LLAMA_PATH = "../storage/models/tinyllama"
|
| 90 |
MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
|