Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -74,7 +74,7 @@ def calculate_cache_size(cache):
|
|
74 |
return total_memory /(1024*1024)
|
75 |
|
76 |
@st.cache_resource
|
77 |
-
def
|
78 |
model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Configure quantization for 4-bit loading
|
79 |
quantization_config = BitsAndBytesConfig(
|
80 |
load_in_4bit=True, # Enable 4-bit quantization
|
@@ -87,13 +87,13 @@ def load_quantized_model_and_tokenizer():
|
|
87 |
model_name,
|
88 |
device_map="auto", # Automatically allocate model to devices
|
89 |
quantization_config=quantization_config,
|
90 |
-
|
91 |
)
|
92 |
|
93 |
# Load the tokenizer
|
94 |
tokenizer = AutoTokenizer.from_pretrained(
|
95 |
model_name,
|
96 |
-
|
97 |
)
|
98 |
return tokenizer, model
|
99 |
|
|
|
74 |
return total_memory /(1024*1024)
|
75 |
|
76 |
@st.cache_resource
|
77 |
+
def load_model_and_tokenizer(doc_text_count):
|
78 |
model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Configure quantization for 4-bit loading
|
79 |
quantization_config = BitsAndBytesConfig(
|
80 |
load_in_4bit=True, # Enable 4-bit quantization
|
|
|
87 |
model_name,
|
88 |
device_map="auto", # Automatically allocate model to devices
|
89 |
quantization_config=quantization_config,
|
90 |
+
model_max_length=1.3*round(doc_text_count * 0.3 + 1)
|
91 |
)
|
92 |
|
93 |
# Load the tokenizer
|
94 |
tokenizer = AutoTokenizer.from_pretrained(
|
95 |
model_name,
|
96 |
+
|
97 |
)
|
98 |
return tokenizer, model
|
99 |
|