mohanz commited on
Commit
d898f74
·
1 Parent(s): b6beebe

adjust restart to be 1 day

Browse files
Files changed (1) hide show
  1. app.py +331 -163
app.py CHANGED
@@ -12,6 +12,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
12
  from datetime import datetime
13
  import numpy as np
14
  import shutil
 
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
16
 
17
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
@@ -25,21 +26,27 @@ os.makedirs(log_dir, exist_ok=True)
25
  logging.basicConfig(
26
  filename=os.path.join(log_dir, "app.log"),
27
  level=logging.INFO,
28
- format="%(asctime)s - %(levelname)s - %(message)s"
29
  )
30
 
31
  logger = logging.getLogger(__name__)
32
 
33
- def get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id = None,):
 
 
 
 
 
 
34
  try:
35
  result = subprocess.run(
36
- ['git', '-C', './llama.cpp', 'describe', '--tags', '--always'],
37
  stdout=subprocess.PIPE,
38
  stderr=subprocess.PIPE,
39
  check=True,
40
- text=True
41
  )
42
- version = result.stdout.strip().split('-')[0]
43
  text = f"""
44
  *Produced by [Antigma Labs](https://antigma.ai)*
45
  ## llama.cpp quantization
@@ -87,32 +94,51 @@ You can either specify a new local-dir (deepseek-ai_DeepSeek-V3-0324-Q8_0) or do
87
 
88
 
89
  def get_repo_namespace(repo_owner, username, user_orgs):
90
- if repo_owner == 'self':
91
  return username
92
  for org in user_orgs:
93
- if org['name'] == repo_owner:
94
- return org['name']
95
  raise ValueError(f"Invalid repo_owner: {repo_owner}")
96
 
 
97
  def escape(s: str) -> str:
98
- return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace("\n", "<br/>")
 
 
 
 
 
 
 
99
 
100
  def toggle_repo_owner(export_to_org, oauth_token: gr.OAuthToken | None):
101
  if oauth_token is None or oauth_token.token is None:
102
  raise gr.Error("You must be logged in to use GGUF-my-repo")
103
  if not export_to_org:
104
- return gr.update(visible=False, choices=["self"], value="self"), gr.update(visible=False, value="")
 
 
105
  info = whoami(oauth_token.token)
106
  orgs = [org["name"] for org in info.get("orgs", [])]
107
- return gr.update(visible=True, choices=["self"] + orgs, value="self"), gr.update(visible=True)
 
 
 
 
108
  def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
109
  imatrix_command = [
110
  "./llama.cpp/llama-imatrix",
111
- "-m", model_path,
112
- "-f", train_data_path,
113
- "-ngl", "99",
114
- "--output-frequency", "10",
115
- "-o", output_path,
 
 
 
 
 
116
  ]
117
 
118
  if not os.path.isfile(model_path):
@@ -124,7 +150,9 @@ def generate_importance_matrix(model_path: str, train_data_path: str, output_pat
124
  try:
125
  process.wait(timeout=60) # added wait
126
  except subprocess.TimeoutExpired:
127
- print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
 
 
128
  process.send_signal(signal.SIGINT)
129
  try:
130
  process.wait(timeout=5) # grace period
@@ -134,7 +162,17 @@ def generate_importance_matrix(model_path: str, train_data_path: str, output_pat
134
 
135
  print("Importance matrix generation completed.")
136
 
137
- def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token: gr.OAuthToken | None, split_max_tensors=256, split_max_size=None, org_token=None, export_to_org=False):
 
 
 
 
 
 
 
 
 
 
138
  print(f"Model path: {model_path}")
139
  print(f"Output dir: {outdir}")
140
 
@@ -153,7 +191,9 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
153
  split_cmd.append(str(split_max_tensors))
154
 
155
  # args for output
156
- model_path_prefix = '.'.join(model_path.split('.')[:-1]) # remove the file extension
 
 
157
  split_cmd.append(model_path)
158
  split_cmd.append(model_path_prefix)
159
 
@@ -172,15 +212,19 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
172
  if os.path.exists(model_path):
173
  os.remove(model_path)
174
 
175
- model_file_prefix = model_path_prefix.split('/')[-1]
176
  print(f"Model file name prefix: {model_file_prefix}")
177
- sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
 
 
 
 
178
  if sharded_model_files:
179
  print(f"Sharded model files: {sharded_model_files}")
180
- if export_to_org and org_token!="":
181
- api = HfApi(token = org_token)
182
  else:
183
- api = HfApi(token=oauth_token.token)
184
  for file in sharded_model_files:
185
  file_path = os.path.join(outdir, file)
186
  print(f"Uploading file: {file_path}")
@@ -197,121 +241,236 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
197
 
198
  print("Sharded model has been uploaded successfully!")
199
 
200
- def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo,
201
- train_data_file, split_model, split_max_tensors, split_max_size,
202
- export_to_org, repo_owner, org_token, oauth_token: gr.OAuthToken | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  if oauth_token is None or oauth_token.token is None:
204
  raise gr.Error("You must be logged in to use GGUF-my-repo")
205
  try:
206
  whoami(oauth_token.token)
207
  except Exception as e:
208
  raise gr.Error("You must be logged in to use GGUF-my-repo")
209
-
210
  user_info = whoami(oauth_token.token)
211
  username = user_info["name"]
212
  user_orgs = user_info.get("orgs", [])
213
  if not export_to_org:
214
  repo_owner = "self"
215
 
216
-
217
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
218
- logger.info(f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}")
 
 
219
 
220
  repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
221
- model_name = model_id.split('/')[-1]
222
  try:
223
- api_token = org_token if (export_to_org and org_token!="") else oauth_token.token
224
- api = HfApi(token=api_token)
225
-
226
- dl_pattern = ["*.md", "*.json", "*.model"]
227
- pattern = "*.safetensors" if any(
228
- f.path.endswith(".safetensors")
229
- for f in api.list_repo_tree(repo_id=model_id, recursive=True)
230
- ) else "*.bin"
231
- dl_pattern += [pattern]
232
-
233
- os.makedirs(downloads_dir, exist_ok=True)
234
- os.makedirs(outputs_dir, exist_ok=True)
235
-
236
- with tempfile.TemporaryDirectory(dir=outputs_dir) as outdir:
237
- fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
238
-
239
- with tempfile.TemporaryDirectory(dir=downloads_dir) as tmpdir:
240
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Start download")
241
- logger.info(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Start download")
242
- local_dir = Path(tmpdir)/model_name
243
- api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
244
-
245
- config_dir = local_dir/"config.json"
246
- adapter_config_dir = local_dir/"adapter_config.json"
247
- if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
248
- raise Exception("adapter_config.json is present. If converting LoRA, use GGUF-my-lora.")
249
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Download successfully")
250
- logger.info(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Download successfully")
251
-
252
- result = subprocess.run(["python", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16], shell=False, capture_output=True)
253
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Converted to f16")
254
- logger.info(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Converted to f16")
255
-
256
- if result.returncode != 0:
257
- raise Exception(f"Error converting to fp16: {result.stderr.decode()}")
258
- shutil.rmtree(downloads_dir)
259
-
260
- imatrix_path = Path(outdir)/"imatrix.dat"
261
- if use_imatrix:
262
- train_data_path = train_data_file.name if train_data_file else "llama.cpp/groups_merged.txt"
263
- if not os.path.isfile(train_data_path):
264
- raise Exception(f"Training data not found: {train_data_path}")
265
- generate_importance_matrix(fp16, train_data_path, imatrix_path)
266
-
267
- quant_methods = [imatrix_q_method] if use_imatrix else (q_method if isinstance(q_method, list) else [q_method])
268
- suffix = "imat" if use_imatrix else None
269
-
270
- gguf_files = []
271
- for method in quant_methods:
272
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Begin quantize")
273
- logger.info(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Begin quantize")
274
-
275
- name = f"{model_name.lower()}-{method.lower()}-{suffix}.gguf" if suffix else f"{model_name.lower()}-{method.lower()}.gguf"
276
- path = str(Path(outdir)/name)
277
- quant_cmd = ["./llama.cpp/llama-quantize", "--imatrix", imatrix_path, fp16, path, method] if use_imatrix else ["./llama.cpp/llama-quantize", fp16, path, method]
278
- result = subprocess.run(quant_cmd, shell=False, capture_output=True)
279
- if result.returncode != 0:
280
- raise Exception(f"Quantization failed ({method}): {result.stderr.decode()}")
281
- size = os.path.getsize(path)/1024/1024/1024
282
- gguf_files.append((name, path, size, method))
283
-
284
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Quantize successfully!")
285
- logger.info(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Quantize successfully!")
286
-
287
- suffix_for_repo = f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
288
- repo_id = f"{repo_namespace}/{model_name}-GGUF"
289
- new_repo_url = api.create_repo(repo_id=repo_id, exist_ok=True, private=private_repo)
290
-
291
- try:
292
- card = ModelCard.load(model_id, token=oauth_token.token)
293
- except:
294
- card = ModelCard("")
295
- card.data.tags = (card.data.tags or []) + ["llama-cpp", "gguf-my-repo"]
296
- card.data.base_model = model_id
297
- card.text = dedent(get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id))
298
- readme_path = Path(outdir)/"README.md"
299
- card.save(readme_path)
300
- for name, path, _, _ in gguf_files:
301
- if split_model:
302
- split_upload_model(path, outdir, repo_id, oauth_token, split_max_tensors, split_max_size, org_token, export_to_org)
303
- else:
304
- api.upload_file(path_or_fileobj=path, path_in_repo=name, repo_id=repo_id)
305
- if use_imatrix and os.path.isfile(imatrix_path):
306
- api.upload_file(path_or_fileobj=imatrix_path, path_in_repo="imatrix.dat", repo_id=repo_id)
307
- api.upload_file(path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=repo_id)
308
-
309
- return (f'<h1>✅ DONE</h1><br/>Repo: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>', f"llama{np.random.randint(9)}.png")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  except Exception as e:
311
- return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
 
 
 
312
 
313
 
314
- css="""/* Custom CSS to allow scrolling */
315
  .gradio-container {overflow-y: auto;}
316
  """
317
  model_id = HuggingfaceHubSearch(
@@ -323,30 +482,36 @@ model_id = HuggingfaceHubSearch(
323
  export_to_org = gr.Checkbox(
324
  label="Export to Organization Repository",
325
  value=False,
326
- info="If checked, you can select an organization to export to."
327
  )
328
 
329
  repo_owner = gr.Dropdown(
330
- choices=["self"],
331
- value="self",
332
- label="Repository Owner",
333
- visible=False
334
  )
335
 
336
- org_token = gr.Textbox(
337
- label="Org Access Token",
338
- type="password",
339
- visible=False
340
- )
341
 
342
  q_method = gr.Dropdown(
343
- ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  label="Quantization Method",
345
  info="GGML quantization type",
346
  value="Q4_K_M",
347
  filterable=False,
348
  visible=True,
349
- multiselect=True
350
  )
351
 
352
  imatrix_q_method = gr.Dropdown(
@@ -355,44 +520,36 @@ imatrix_q_method = gr.Dropdown(
355
  info="GGML imatrix quants type",
356
  value="IQ4_NL",
357
  filterable=False,
358
- visible=False
359
  )
360
 
361
  use_imatrix = gr.Checkbox(
362
  value=False,
363
  label="Use Imatrix Quantization",
364
- info="Use importance matrix for quantization."
365
  )
366
 
367
  private_repo = gr.Checkbox(
368
- value=False,
369
- label="Private Repo",
370
- info="Create a private repo under your username."
371
  )
372
 
373
- train_data_file = gr.File(
374
- label="Training Data File",
375
- file_types=["txt"],
376
- visible=False
377
- )
378
 
379
  split_model = gr.Checkbox(
380
- value=False,
381
- label="Split Model",
382
- info="Shard the model using gguf-split."
383
  )
384
 
385
  split_max_tensors = gr.Number(
386
  value=256,
387
  label="Max Tensors per File",
388
  info="Maximum number of tensors per file when splitting model.",
389
- visible=False
390
  )
391
 
392
  split_max_size = gr.Textbox(
393
  label="Max File Size",
394
  info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default. Accepted suffixes: M, G. Example: 256M, 5G",
395
- visible=False
396
  )
397
 
398
  iface = gr.Interface(
@@ -409,36 +566,47 @@ iface = gr.Interface(
409
  split_max_size,
410
  export_to_org,
411
  repo_owner,
412
- org_token
413
- ],
414
- outputs=[
415
- gr.Markdown(label="Output"),
416
- gr.Image(show_label=False)
417
  ],
 
418
  title="Make your own GGUF Quants — faster than ever before, believe me.",
419
  description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
420
- api_name=False
421
  )
422
  with gr.Blocks(css=".gradio-container {overflow-y: auto;}") as demo:
423
  gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
424
  gr.LoginButton(min_width=250)
425
 
426
-
427
-
428
- export_to_org.change(fn=toggle_repo_owner, inputs=[export_to_org], outputs=[repo_owner, org_token])
429
-
430
- split_model.change(fn=lambda sm: (gr.update(visible=sm), gr.update(visible=sm)), inputs=split_model, outputs=[split_max_tensors, split_max_size])
431
- use_imatrix.change(fn=lambda use: (gr.update(visible=not use), gr.update(visible=use), gr.update(visible=use)), inputs=use_imatrix, outputs=[q_method, imatrix_q_method, train_data_file])
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
  iface.render()
434
 
435
 
436
-
437
  def restart_space():
438
- HfApi().restart_space(repo_id="Antigma/quantize-my-repo", token=HF_TOKEN, factory_reboot=True)
 
 
 
439
 
440
  scheduler = BackgroundScheduler()
441
- scheduler.add_job(restart_space, "interval", seconds=21600)
442
  scheduler.start()
443
 
444
- demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)
 
12
  from datetime import datetime
13
  import numpy as np
14
  import shutil
15
+
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
 
18
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 
26
  logging.basicConfig(
27
  filename=os.path.join(log_dir, "app.log"),
28
  level=logging.INFO,
29
+ format="%(asctime)s - %(levelname)s - %(message)s",
30
  )
31
 
32
  logger = logging.getLogger(__name__)
33
 
34
+
35
+ def get_llama_cpp_notes(
36
+ gguf_files,
37
+ new_repo_url,
38
+ split_model,
39
+ model_id=None,
40
+ ):
41
  try:
42
  result = subprocess.run(
43
+ ["git", "-C", "./llama.cpp", "describe", "--tags", "--always"],
44
  stdout=subprocess.PIPE,
45
  stderr=subprocess.PIPE,
46
  check=True,
47
+ text=True,
48
  )
49
+ version = result.stdout.strip().split("-")[0]
50
  text = f"""
51
  *Produced by [Antigma Labs](https://antigma.ai)*
52
  ## llama.cpp quantization
 
94
 
95
 
96
  def get_repo_namespace(repo_owner, username, user_orgs):
97
+ if repo_owner == "self":
98
  return username
99
  for org in user_orgs:
100
+ if org["name"] == repo_owner:
101
+ return org["name"]
102
  raise ValueError(f"Invalid repo_owner: {repo_owner}")
103
 
104
+
105
  def escape(s: str) -> str:
106
+ return (
107
+ s.replace("&", "&amp;")
108
+ .replace("<", "&lt;")
109
+ .replace(">", "&gt;")
110
+ .replace('"', "&quot;")
111
+ .replace("\n", "<br/>")
112
+ )
113
+
114
 
115
  def toggle_repo_owner(export_to_org, oauth_token: gr.OAuthToken | None):
116
  if oauth_token is None or oauth_token.token is None:
117
  raise gr.Error("You must be logged in to use GGUF-my-repo")
118
  if not export_to_org:
119
+ return gr.update(visible=False, choices=["self"], value="self"), gr.update(
120
+ visible=False, value=""
121
+ )
122
  info = whoami(oauth_token.token)
123
  orgs = [org["name"] for org in info.get("orgs", [])]
124
+ return gr.update(visible=True, choices=["self"] + orgs, value="self"), gr.update(
125
+ visible=True
126
+ )
127
+
128
+
129
  def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
130
  imatrix_command = [
131
  "./llama.cpp/llama-imatrix",
132
+ "-m",
133
+ model_path,
134
+ "-f",
135
+ train_data_path,
136
+ "-ngl",
137
+ "99",
138
+ "--output-frequency",
139
+ "10",
140
+ "-o",
141
+ output_path,
142
  ]
143
 
144
  if not os.path.isfile(model_path):
 
150
  try:
151
  process.wait(timeout=60) # added wait
152
  except subprocess.TimeoutExpired:
153
+ print(
154
+ "Imatrix computation timed out. Sending SIGINT to allow graceful termination..."
155
+ )
156
  process.send_signal(signal.SIGINT)
157
  try:
158
  process.wait(timeout=5) # grace period
 
162
 
163
  print("Importance matrix generation completed.")
164
 
165
+
166
+ def split_upload_model(
167
+ model_path: str,
168
+ outdir: str,
169
+ repo_id: str,
170
+ oauth_token: gr.OAuthToken | None,
171
+ split_max_tensors=256,
172
+ split_max_size=None,
173
+ org_token=None,
174
+ export_to_org=False,
175
+ ):
176
  print(f"Model path: {model_path}")
177
  print(f"Output dir: {outdir}")
178
 
 
191
  split_cmd.append(str(split_max_tensors))
192
 
193
  # args for output
194
+ model_path_prefix = ".".join(
195
+ model_path.split(".")[:-1]
196
+ ) # remove the file extension
197
  split_cmd.append(model_path)
198
  split_cmd.append(model_path_prefix)
199
 
 
212
  if os.path.exists(model_path):
213
  os.remove(model_path)
214
 
215
+ model_file_prefix = model_path_prefix.split("/")[-1]
216
  print(f"Model file name prefix: {model_file_prefix}")
217
+ sharded_model_files = [
218
+ f
219
+ for f in os.listdir(outdir)
220
+ if f.startswith(model_file_prefix) and f.endswith(".gguf")
221
+ ]
222
  if sharded_model_files:
223
  print(f"Sharded model files: {sharded_model_files}")
224
+ if export_to_org and org_token != "":
225
+ api = HfApi(token=org_token)
226
  else:
227
+ api = HfApi(token=oauth_token.token)
228
  for file in sharded_model_files:
229
  file_path = os.path.join(outdir, file)
230
  print(f"Uploading file: {file_path}")
 
241
 
242
  print("Sharded model has been uploaded successfully!")
243
 
244
+
245
+ def process_model(
246
+ model_id,
247
+ q_method,
248
+ use_imatrix,
249
+ imatrix_q_method,
250
+ private_repo,
251
+ train_data_file,
252
+ split_model,
253
+ split_max_tensors,
254
+ split_max_size,
255
+ export_to_org,
256
+ repo_owner,
257
+ org_token,
258
+ oauth_token: gr.OAuthToken | None,
259
+ ):
260
  if oauth_token is None or oauth_token.token is None:
261
  raise gr.Error("You must be logged in to use GGUF-my-repo")
262
  try:
263
  whoami(oauth_token.token)
264
  except Exception as e:
265
  raise gr.Error("You must be logged in to use GGUF-my-repo")
266
+
267
  user_info = whoami(oauth_token.token)
268
  username = user_info["name"]
269
  user_orgs = user_info.get("orgs", [])
270
  if not export_to_org:
271
  repo_owner = "self"
272
 
 
273
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
274
+ logger.info(
275
+ f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}"
276
+ )
277
 
278
  repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
279
+ model_name = model_id.split("/")[-1]
280
  try:
281
+ api_token = (
282
+ org_token if (export_to_org and org_token != "") else oauth_token.token
283
+ )
284
+ api = HfApi(token=api_token)
285
+
286
+ dl_pattern = ["*.md", "*.json", "*.model"]
287
+ pattern = (
288
+ "*.safetensors"
289
+ if any(
290
+ f.path.endswith(".safetensors")
291
+ for f in api.list_repo_tree(repo_id=model_id, recursive=True)
292
+ )
293
+ else "*.bin"
294
+ )
295
+ dl_pattern += [pattern]
296
+
297
+ os.makedirs(downloads_dir, exist_ok=True)
298
+ os.makedirs(outputs_dir, exist_ok=True)
299
+
300
+ with tempfile.TemporaryDirectory(dir=outputs_dir) as outdir:
301
+ fp16 = str(Path(outdir) / f"{model_name}.fp16.gguf")
302
+
303
+ with tempfile.TemporaryDirectory(dir=downloads_dir) as tmpdir:
304
+ print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Start download")
305
+ logger.info(
306
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Start download"
307
+ )
308
+ local_dir = Path(tmpdir) / model_name
309
+ api.snapshot_download(
310
+ repo_id=model_id,
311
+ local_dir=local_dir,
312
+ local_dir_use_symlinks=False,
313
+ allow_patterns=dl_pattern,
314
+ )
315
+
316
+ config_dir = local_dir / "config.json"
317
+ adapter_config_dir = local_dir / "adapter_config.json"
318
+ if os.path.exists(adapter_config_dir) and not os.path.exists(
319
+ config_dir
320
+ ):
321
+ raise Exception(
322
+ "adapter_config.json is present. If converting LoRA, use GGUF-my-lora."
323
+ )
324
+ print(
325
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S")
326
+ + " Download successfully"
327
+ )
328
+ logger.info(
329
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S")
330
+ + " Download successfully"
331
+ )
332
+
333
+ result = subprocess.run(
334
+ [
335
+ "python",
336
+ CONVERSION_SCRIPT,
337
+ local_dir,
338
+ "--outtype",
339
+ "f16",
340
+ "--outfile",
341
+ fp16,
342
+ ],
343
+ shell=False,
344
+ capture_output=True,
345
+ )
346
+ print(
347
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Converted to f16"
348
+ )
349
+ logger.info(
350
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Converted to f16"
351
+ )
352
+
353
+ if result.returncode != 0:
354
+ raise Exception(
355
+ f"Error converting to fp16: {result.stderr.decode()}"
356
+ )
357
+ shutil.rmtree(downloads_dir)
358
+
359
+ imatrix_path = Path(outdir) / "imatrix.dat"
360
+ if use_imatrix:
361
+ train_data_path = (
362
+ train_data_file.name
363
+ if train_data_file
364
+ else "llama.cpp/groups_merged.txt"
365
+ )
366
+ if not os.path.isfile(train_data_path):
367
+ raise Exception(f"Training data not found: {train_data_path}")
368
+ generate_importance_matrix(fp16, train_data_path, imatrix_path)
369
+
370
+ quant_methods = (
371
+ [imatrix_q_method]
372
+ if use_imatrix
373
+ else (q_method if isinstance(q_method, list) else [q_method])
374
+ )
375
+ suffix = "imat" if use_imatrix else None
376
+
377
+ gguf_files = []
378
+ for method in quant_methods:
379
+ print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Begin quantize")
380
+ logger.info(
381
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Begin quantize"
382
+ )
383
+
384
+ name = (
385
+ f"{model_name.lower()}-{method.lower()}-{suffix}.gguf"
386
+ if suffix
387
+ else f"{model_name.lower()}-{method.lower()}.gguf"
388
+ )
389
+ path = str(Path(outdir) / name)
390
+ quant_cmd = (
391
+ [
392
+ "./llama.cpp/llama-quantize",
393
+ "--imatrix",
394
+ imatrix_path,
395
+ fp16,
396
+ path,
397
+ method,
398
+ ]
399
+ if use_imatrix
400
+ else ["./llama.cpp/llama-quantize", fp16, path, method]
401
+ )
402
+ result = subprocess.run(quant_cmd, shell=False, capture_output=True)
403
+ if result.returncode != 0:
404
+ raise Exception(
405
+ f"Quantization failed ({method}): {result.stderr.decode()}"
406
+ )
407
+ size = os.path.getsize(path) / 1024 / 1024 / 1024
408
+ gguf_files.append((name, path, size, method))
409
+
410
+ print(
411
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Quantize successfully!"
412
+ )
413
+ logger.info(
414
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Quantize successfully!"
415
+ )
416
+
417
+ suffix_for_repo = (
418
+ f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
419
+ )
420
+ repo_id = f"{repo_namespace}/{model_name}-GGUF"
421
+ new_repo_url = api.create_repo(
422
+ repo_id=repo_id, exist_ok=True, private=private_repo
423
+ )
424
+
425
+ try:
426
+ card = ModelCard.load(model_id, token=oauth_token.token)
427
+ except:
428
+ card = ModelCard("")
429
+ card.data.tags = (card.data.tags or []) + ["llama-cpp", "gguf-my-repo"]
430
+ card.data.base_model = model_id
431
+ card.text = dedent(
432
+ get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id)
433
+ )
434
+ readme_path = Path(outdir) / "README.md"
435
+ card.save(readme_path)
436
+ for name, path, _, _ in gguf_files:
437
+ if split_model:
438
+ split_upload_model(
439
+ path,
440
+ outdir,
441
+ repo_id,
442
+ oauth_token,
443
+ split_max_tensors,
444
+ split_max_size,
445
+ org_token,
446
+ export_to_org,
447
+ )
448
+ else:
449
+ api.upload_file(
450
+ path_or_fileobj=path, path_in_repo=name, repo_id=repo_id
451
+ )
452
+ if use_imatrix and os.path.isfile(imatrix_path):
453
+ api.upload_file(
454
+ path_or_fileobj=imatrix_path,
455
+ path_in_repo="imatrix.dat",
456
+ repo_id=repo_id,
457
+ )
458
+ api.upload_file(
459
+ path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=repo_id
460
+ )
461
+
462
+ return (
463
+ f'<h1>✅ DONE</h1><br/>Repo: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>',
464
+ f"llama{np.random.randint(9)}.png",
465
+ )
466
  except Exception as e:
467
+ return (
468
+ f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>',
469
+ "error.png",
470
+ )
471
 
472
 
473
+ css = """/* Custom CSS to allow scrolling */
474
  .gradio-container {overflow-y: auto;}
475
  """
476
  model_id = HuggingfaceHubSearch(
 
482
  export_to_org = gr.Checkbox(
483
  label="Export to Organization Repository",
484
  value=False,
485
+ info="If checked, you can select an organization to export to.",
486
  )
487
 
488
  repo_owner = gr.Dropdown(
489
+ choices=["self"], value="self", label="Repository Owner", visible=False
 
 
 
490
  )
491
 
492
+ org_token = gr.Textbox(label="Org Access Token", type="password", visible=False)
 
 
 
 
493
 
494
  q_method = gr.Dropdown(
495
+ [
496
+ "Q2_K",
497
+ "Q3_K_S",
498
+ "Q3_K_M",
499
+ "Q3_K_L",
500
+ "Q4_0",
501
+ "Q4_K_S",
502
+ "Q4_K_M",
503
+ "Q5_0",
504
+ "Q5_K_S",
505
+ "Q5_K_M",
506
+ "Q6_K",
507
+ "Q8_0",
508
+ ],
509
  label="Quantization Method",
510
  info="GGML quantization type",
511
  value="Q4_K_M",
512
  filterable=False,
513
  visible=True,
514
+ multiselect=True,
515
  )
516
 
517
  imatrix_q_method = gr.Dropdown(
 
520
  info="GGML imatrix quants type",
521
  value="IQ4_NL",
522
  filterable=False,
523
+ visible=False,
524
  )
525
 
526
  use_imatrix = gr.Checkbox(
527
  value=False,
528
  label="Use Imatrix Quantization",
529
+ info="Use importance matrix for quantization.",
530
  )
531
 
532
  private_repo = gr.Checkbox(
533
+ value=False, label="Private Repo", info="Create a private repo under your username."
 
 
534
  )
535
 
536
+ train_data_file = gr.File(label="Training Data File", file_types=["txt"], visible=False)
 
 
 
 
537
 
538
  split_model = gr.Checkbox(
539
+ value=False, label="Split Model", info="Shard the model using gguf-split."
 
 
540
  )
541
 
542
  split_max_tensors = gr.Number(
543
  value=256,
544
  label="Max Tensors per File",
545
  info="Maximum number of tensors per file when splitting model.",
546
+ visible=False,
547
  )
548
 
549
  split_max_size = gr.Textbox(
550
  label="Max File Size",
551
  info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default. Accepted suffixes: M, G. Example: 256M, 5G",
552
+ visible=False,
553
  )
554
 
555
  iface = gr.Interface(
 
566
  split_max_size,
567
  export_to_org,
568
  repo_owner,
569
+ org_token,
 
 
 
 
570
  ],
571
+ outputs=[gr.Markdown(label="Output"), gr.Image(show_label=False)],
572
  title="Make your own GGUF Quants — faster than ever before, believe me.",
573
  description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
574
+ api_name=False,
575
  )
576
  with gr.Blocks(css=".gradio-container {overflow-y: auto;}") as demo:
577
  gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
578
  gr.LoginButton(min_width=250)
579
 
580
+ export_to_org.change(
581
+ fn=toggle_repo_owner, inputs=[export_to_org], outputs=[repo_owner, org_token]
582
+ )
583
+
584
+ split_model.change(
585
+ fn=lambda sm: (gr.update(visible=sm), gr.update(visible=sm)),
586
+ inputs=split_model,
587
+ outputs=[split_max_tensors, split_max_size],
588
+ )
589
+ use_imatrix.change(
590
+ fn=lambda use: (
591
+ gr.update(visible=not use),
592
+ gr.update(visible=use),
593
+ gr.update(visible=use),
594
+ ),
595
+ inputs=use_imatrix,
596
+ outputs=[q_method, imatrix_q_method, train_data_file],
597
+ )
598
 
599
  iface.render()
600
 
601
 
 
602
  def restart_space():
603
+ HfApi().restart_space(
604
+ repo_id="Antigma/quantize-my-repo", token=HF_TOKEN, factory_reboot=True
605
+ )
606
+
607
 
608
  scheduler = BackgroundScheduler()
609
+ scheduler.add_job(restart_space, "interval", seconds=86400)
610
  scheduler.start()
611
 
612
+ demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)