kwabs22 commited on
Commit
276706e
·
1 Parent(s): 3976009

image model gated

Browse files
Files changed (1) hide show
  1. leveraging_machine_learning.py +31 -31
leveraging_machine_learning.py CHANGED
@@ -20,12 +20,12 @@ modelnames = ["stvlynn/Gemma-2-2b-Chinese-it", "unsloth/Llama-3.2-1B-Instruct",
20
  "Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2-0.5B-Instruct", "Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-7B-Instruct", "Qwen/Qwen1.5-MoE-A2.7B-Chat", "HuggingFaceTB/SmolLM-135M-Instruct", "microsoft/Phi-3-mini-4k-instruct", "Groq/Llama-3-Groq-8B-Tool-Use", "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4",
21
  "SpectraSuite/TriLM_3.9B_Unpacked", "h2oai/h2o-danube3-500m-chat", "OuteAI/Lite-Mistral-150M-v2-Instruct", "Zyphra/Zamba2-1.2B", "anthracite-org/magnum-v2-4b", ]
22
 
23
- imagemodelnames = ["black-forest-labs/FLUX.1-schnell"]
24
 
25
  current_model_index = 0
26
  current_image_model_index = 0
27
  modelname = modelnames[current_model_index]
28
- imagemodelname = imagemodelnames[current_image_model_index]
29
  lastmodelnameinloadfunction = None
30
  lastimagemodelnameinloadfunction = None
31
 
@@ -35,7 +35,7 @@ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
35
  # Initialize model and tokenizer as global variables
36
  model = None
37
  tokenizer = None
38
- flux_pipe = None
39
 
40
  # Dictionary to store loaded models
41
  loaded_models = {}
@@ -90,42 +90,42 @@ def load_model(model_name):
90
  f"Tokenizer size: {get_size_str(tokenizer_size)}, "
91
  f"GPU memory used: {get_size_str(memory_used)}")
92
 
93
- def load_image_model(imagemodelname):
94
- global flux_pipe, lastimagemodelnameinloadfunction, loaded_models
95
 
96
- print(f"Loading image model: {imagemodelname}")
97
 
98
- # Record initial GPU memory usage
99
- initial_memory = torch.cuda.memory_allocated()
100
 
101
- if 'flux_pipe' in globals() and flux_pipe is not None:
102
- flux_pipe = None
103
 
104
- torch.cuda.empty_cache()
105
- gc.collect()
106
 
107
- flux_pipe = FluxPipeline.from_pretrained(imagemodelname, torch_dtype=torch.bfloat16)
108
- flux_pipe.enable_model_cpu_offload()
109
- model_size = sum(p.numel() * p.element_size() for p in flux_pipe.transformer.parameters())
110
- #tokenizer_size = 0 # FLUX doesn't use a separate tokenizer
111
- loaded_models[imagemodelname] = flux_pipe
112
 
113
- # Calculate memory usage
114
- final_memory = torch.cuda.memory_allocated()
115
- memory_used = final_memory - initial_memory
116
 
117
- loaded_models[imagemodelname] = [str(time.time()), memory_used]
118
 
119
- lastimagemodelnameinloadfunction = (imagemodelname, model_size) #, tokenizer_size)
120
- print(f"Model and tokenizer {imagemodelname} loaded successfully")
121
- print(f"Model size: {get_size_str(model_size)}")
122
- #print(f"Tokenizer size: {get_size_str(tokenizer_size)}")
123
- print(f"GPU memory used: {get_size_str(memory_used)}")
124
 
125
- return (f"Model and tokenizer {imagemodelname} loaded successfully. "
126
- f"Model size: {get_size_str(model_size)}, "
127
- #f"Tokenizer size: {get_size_str(tokenizer_size)}, "
128
- f"GPU memory used: {get_size_str(memory_used)}")
129
 
130
 
131
  def clear_all_models():
@@ -160,7 +160,7 @@ def loaded_model_list():
160
 
161
  # Initial model load
162
  load_model(modelname)
163
- load_image_model(imagemodelname)
164
 
165
  # Create embeddings for the knowledge base
166
  knowledge_base_embeddings = embedding_model.encode([doc["content"] for doc in knowledge_base])
 
20
  "Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2-0.5B-Instruct", "Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-7B-Instruct", "Qwen/Qwen1.5-MoE-A2.7B-Chat", "HuggingFaceTB/SmolLM-135M-Instruct", "microsoft/Phi-3-mini-4k-instruct", "Groq/Llama-3-Groq-8B-Tool-Use", "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4",
21
  "SpectraSuite/TriLM_3.9B_Unpacked", "h2oai/h2o-danube3-500m-chat", "OuteAI/Lite-Mistral-150M-v2-Instruct", "Zyphra/Zamba2-1.2B", "anthracite-org/magnum-v2-4b", ]
22
 
23
+ # imagemodelnames = ["black-forest-labs/FLUX.1-schnell"]
24
 
25
  current_model_index = 0
26
  current_image_model_index = 0
27
  modelname = modelnames[current_model_index]
28
+ # imagemodelname = imagemodelnames[current_image_model_index]
29
  lastmodelnameinloadfunction = None
30
  lastimagemodelnameinloadfunction = None
31
 
 
35
  # Initialize model and tokenizer as global variables
36
  model = None
37
  tokenizer = None
38
+ # flux_pipe = None
39
 
40
  # Dictionary to store loaded models
41
  loaded_models = {}
 
90
  f"Tokenizer size: {get_size_str(tokenizer_size)}, "
91
  f"GPU memory used: {get_size_str(memory_used)}")
92
 
93
+ # def load_image_model(imagemodelname):
94
+ # global flux_pipe, lastimagemodelnameinloadfunction, loaded_models
95
 
96
+ # print(f"Loading image model: {imagemodelname}")
97
 
98
+ # # Record initial GPU memory usage
99
+ # initial_memory = torch.cuda.memory_allocated()
100
 
101
+ # if 'flux_pipe' in globals() and flux_pipe is not None:
102
+ # flux_pipe = None
103
 
104
+ # torch.cuda.empty_cache()
105
+ # gc.collect()
106
 
107
+ # flux_pipe = FluxPipeline.from_pretrained(imagemodelname, torch_dtype=torch.bfloat16)
108
+ # flux_pipe.enable_model_cpu_offload()
109
+ # model_size = sum(p.numel() * p.element_size() for p in flux_pipe.transformer.parameters())
110
+ # #tokenizer_size = 0 # FLUX doesn't use a separate tokenizer
111
+ # loaded_models[imagemodelname] = flux_pipe
112
 
113
+ # # Calculate memory usage
114
+ # final_memory = torch.cuda.memory_allocated()
115
+ # memory_used = final_memory - initial_memory
116
 
117
+ # loaded_models[imagemodelname] = [str(time.time()), memory_used]
118
 
119
+ # lastimagemodelnameinloadfunction = (imagemodelname, model_size) #, tokenizer_size)
120
+ # print(f"Model and tokenizer {imagemodelname} loaded successfully")
121
+ # print(f"Model size: {get_size_str(model_size)}")
122
+ # #print(f"Tokenizer size: {get_size_str(tokenizer_size)}")
123
+ # print(f"GPU memory used: {get_size_str(memory_used)}")
124
 
125
+ # return (f"Model and tokenizer {imagemodelname} loaded successfully. "
126
+ # f"Model size: {get_size_str(model_size)}, "
127
+ # #f"Tokenizer size: {get_size_str(tokenizer_size)}, "
128
+ # f"GPU memory used: {get_size_str(memory_used)}")
129
 
130
 
131
  def clear_all_models():
 
160
 
161
  # Initial model load
162
  load_model(modelname)
163
+ # load_image_model(imagemodelname)
164
 
165
  # Create embeddings for the knowledge base
166
  knowledge_base_embeddings = embedding_model.encode([doc["content"] for doc in knowledge_base])