Spaces:
Running
on
Zero
Running
on
Zero
kwabs22
commited on
Commit
·
276706e
1
Parent(s):
3976009
image model gated
Browse files- leveraging_machine_learning.py +31 -31
leveraging_machine_learning.py
CHANGED
@@ -20,12 +20,12 @@ modelnames = ["stvlynn/Gemma-2-2b-Chinese-it", "unsloth/Llama-3.2-1B-Instruct",
|
|
20 |
"Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2-0.5B-Instruct", "Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-7B-Instruct", "Qwen/Qwen1.5-MoE-A2.7B-Chat", "HuggingFaceTB/SmolLM-135M-Instruct", "microsoft/Phi-3-mini-4k-instruct", "Groq/Llama-3-Groq-8B-Tool-Use", "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4",
|
21 |
"SpectraSuite/TriLM_3.9B_Unpacked", "h2oai/h2o-danube3-500m-chat", "OuteAI/Lite-Mistral-150M-v2-Instruct", "Zyphra/Zamba2-1.2B", "anthracite-org/magnum-v2-4b", ]
|
22 |
|
23 |
-
imagemodelnames = ["black-forest-labs/FLUX.1-schnell"]
|
24 |
|
25 |
current_model_index = 0
|
26 |
current_image_model_index = 0
|
27 |
modelname = modelnames[current_model_index]
|
28 |
-
imagemodelname = imagemodelnames[current_image_model_index]
|
29 |
lastmodelnameinloadfunction = None
|
30 |
lastimagemodelnameinloadfunction = None
|
31 |
|
@@ -35,7 +35,7 @@ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
35 |
# Initialize model and tokenizer as global variables
|
36 |
model = None
|
37 |
tokenizer = None
|
38 |
-
flux_pipe = None
|
39 |
|
40 |
# Dictionary to store loaded models
|
41 |
loaded_models = {}
|
@@ -90,42 +90,42 @@ def load_model(model_name):
|
|
90 |
f"Tokenizer size: {get_size_str(tokenizer_size)}, "
|
91 |
f"GPU memory used: {get_size_str(memory_used)}")
|
92 |
|
93 |
-
def load_image_model(imagemodelname):
|
94 |
-
|
95 |
|
96 |
-
|
97 |
|
98 |
-
|
99 |
-
|
100 |
|
101 |
-
|
102 |
-
|
103 |
|
104 |
-
|
105 |
-
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
|
117 |
-
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
|
130 |
|
131 |
def clear_all_models():
|
@@ -160,7 +160,7 @@ def loaded_model_list():
|
|
160 |
|
161 |
# Initial model load
|
162 |
load_model(modelname)
|
163 |
-
load_image_model(imagemodelname)
|
164 |
|
165 |
# Create embeddings for the knowledge base
|
166 |
knowledge_base_embeddings = embedding_model.encode([doc["content"] for doc in knowledge_base])
|
|
|
20 |
"Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2-0.5B-Instruct", "Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-7B-Instruct", "Qwen/Qwen1.5-MoE-A2.7B-Chat", "HuggingFaceTB/SmolLM-135M-Instruct", "microsoft/Phi-3-mini-4k-instruct", "Groq/Llama-3-Groq-8B-Tool-Use", "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4",
|
21 |
"SpectraSuite/TriLM_3.9B_Unpacked", "h2oai/h2o-danube3-500m-chat", "OuteAI/Lite-Mistral-150M-v2-Instruct", "Zyphra/Zamba2-1.2B", "anthracite-org/magnum-v2-4b", ]
|
22 |
|
23 |
+
# imagemodelnames = ["black-forest-labs/FLUX.1-schnell"]
|
24 |
|
25 |
current_model_index = 0
|
26 |
current_image_model_index = 0
|
27 |
modelname = modelnames[current_model_index]
|
28 |
+
# imagemodelname = imagemodelnames[current_image_model_index]
|
29 |
lastmodelnameinloadfunction = None
|
30 |
lastimagemodelnameinloadfunction = None
|
31 |
|
|
|
35 |
# Initialize model and tokenizer as global variables
|
36 |
model = None
|
37 |
tokenizer = None
|
38 |
+
# flux_pipe = None
|
39 |
|
40 |
# Dictionary to store loaded models
|
41 |
loaded_models = {}
|
|
|
90 |
f"Tokenizer size: {get_size_str(tokenizer_size)}, "
|
91 |
f"GPU memory used: {get_size_str(memory_used)}")
|
92 |
|
93 |
+
# def load_image_model(imagemodelname):
|
94 |
+
# global flux_pipe, lastimagemodelnameinloadfunction, loaded_models
|
95 |
|
96 |
+
# print(f"Loading image model: {imagemodelname}")
|
97 |
|
98 |
+
# # Record initial GPU memory usage
|
99 |
+
# initial_memory = torch.cuda.memory_allocated()
|
100 |
|
101 |
+
# if 'flux_pipe' in globals() and flux_pipe is not None:
|
102 |
+
# flux_pipe = None
|
103 |
|
104 |
+
# torch.cuda.empty_cache()
|
105 |
+
# gc.collect()
|
106 |
|
107 |
+
# flux_pipe = FluxPipeline.from_pretrained(imagemodelname, torch_dtype=torch.bfloat16)
|
108 |
+
# flux_pipe.enable_model_cpu_offload()
|
109 |
+
# model_size = sum(p.numel() * p.element_size() for p in flux_pipe.transformer.parameters())
|
110 |
+
# #tokenizer_size = 0 # FLUX doesn't use a separate tokenizer
|
111 |
+
# loaded_models[imagemodelname] = flux_pipe
|
112 |
|
113 |
+
# # Calculate memory usage
|
114 |
+
# final_memory = torch.cuda.memory_allocated()
|
115 |
+
# memory_used = final_memory - initial_memory
|
116 |
|
117 |
+
# loaded_models[imagemodelname] = [str(time.time()), memory_used]
|
118 |
|
119 |
+
# lastimagemodelnameinloadfunction = (imagemodelname, model_size) #, tokenizer_size)
|
120 |
+
# print(f"Model and tokenizer {imagemodelname} loaded successfully")
|
121 |
+
# print(f"Model size: {get_size_str(model_size)}")
|
122 |
+
# #print(f"Tokenizer size: {get_size_str(tokenizer_size)}")
|
123 |
+
# print(f"GPU memory used: {get_size_str(memory_used)}")
|
124 |
|
125 |
+
# return (f"Model and tokenizer {imagemodelname} loaded successfully. "
|
126 |
+
# f"Model size: {get_size_str(model_size)}, "
|
127 |
+
# #f"Tokenizer size: {get_size_str(tokenizer_size)}, "
|
128 |
+
# f"GPU memory used: {get_size_str(memory_used)}")
|
129 |
|
130 |
|
131 |
def clear_all_models():
|
|
|
160 |
|
161 |
# Initial model load
|
162 |
load_model(modelname)
|
163 |
+
# load_image_model(imagemodelname)
|
164 |
|
165 |
# Create embeddings for the knowledge base
|
166 |
knowledge_base_embeddings = embedding_model.encode([doc["content"] for doc in knowledge_base])
|