Brianpuz commited on
Commit
8a60066
·
1 Parent(s): 087286c

Save the model to the data directory

Browse files
Files changed (1) hide show
  1. app.py +147 -309
app.py CHANGED
@@ -15,45 +15,29 @@ import numpy as np
15
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
16
  CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
17
 
18
- # Get Hugging Face token from environment variable
19
- HF_TOKEN = os.environ.get("HF_TOKEN")
20
-
21
- # Set up persistent storage paths
22
  log_dir = "/data/logs"
23
  downloads_dir = "/data/downloads"
24
  outputs_dir = "/data/outputs"
25
- models_dir = "/data/models"
26
-
27
- # Create directories if they don't exist
28
  os.makedirs(log_dir, exist_ok=True)
29
- os.makedirs(downloads_dir, exist_ok=True)
30
- os.makedirs(outputs_dir, exist_ok=True)
31
- os.makedirs(models_dir, exist_ok=True)
32
 
33
  logging.basicConfig(
34
  filename=os.path.join(log_dir, "app.log"),
35
  level=logging.INFO,
36
- format="%(asctime)s - %(levelname)s - %(message)s",
37
  )
38
 
39
  logger = logging.getLogger(__name__)
40
 
41
-
42
- def get_llama_cpp_notes(
43
- gguf_files,
44
- new_repo_url,
45
- split_model,
46
- model_id=None,
47
- ):
48
  try:
49
  result = subprocess.run(
50
- ["git", "-C", "./llama.cpp", "describe", "--tags", "--always"],
51
  stdout=subprocess.PIPE,
52
  stderr=subprocess.PIPE,
53
  check=True,
54
- text=True,
55
  )
56
- version = result.stdout.strip().split("-")[0]
57
  text = f"""
58
  *Produced by [Antigma Labs](https://antigma.ai)*
59
  ## llama.cpp quantization
@@ -62,8 +46,7 @@ Original model: https://huggingface.co/{model_id}
62
  Run them directly with [llama.cpp](https://github.com/ggml-org/llama.cpp), or any other llama.cpp based project
63
  ## Prompt format
64
  ```
65
- {{system_prompt}}
66
- {{prompt}}
67
  ```
68
  ## Download a file (not the whole branch) from below:
69
  | Filename | Quant type | File Size | Split |
@@ -95,51 +78,32 @@ You can either specify a new local-dir (deepseek-ai_DeepSeek-V3-0324-Q8_0) or do
95
 
96
 
97
  def get_repo_namespace(repo_owner, username, user_orgs):
98
- if repo_owner == "self":
99
  return username
100
  for org in user_orgs:
101
- if org["name"] == repo_owner:
102
- return org["name"]
103
  raise ValueError(f"Invalid repo_owner: {repo_owner}")
104
 
105
-
106
  def escape(s: str) -> str:
107
- return (
108
- s.replace("&", "&")
109
- .replace("<", "&lt;")
110
- .replace(">", "&gt;")
111
- .replace('"', "&quot;")
112
- .replace("\n", "<br/>")
113
- )
114
-
115
 
116
  def toggle_repo_owner(export_to_org, oauth_token: gr.OAuthToken | None):
117
  if oauth_token is None or oauth_token.token is None:
118
  raise gr.Error("You must be logged in to use GGUF-my-repo")
119
  if not export_to_org:
120
- return gr.update(visible=False, choices=["self"], value="self"), gr.update(
121
- visible=False, value=""
122
- )
123
  info = whoami(oauth_token.token)
124
  orgs = [org["name"] for org in info.get("orgs", [])]
125
- return gr.update(visible=True, choices=["self"] + orgs, value="self"), gr.update(
126
- visible=True
127
- )
128
-
129
-
130
  def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
131
  imatrix_command = [
132
  "./llama.cpp/llama-imatrix",
133
- "-m",
134
- model_path,
135
- "-f",
136
- train_data_path,
137
- "-ngl",
138
- "99",
139
- "--output-frequency",
140
- "10",
141
- "-o",
142
- output_path,
143
  ]
144
 
145
  if not os.path.isfile(model_path):
@@ -151,9 +115,7 @@ def generate_importance_matrix(model_path: str, train_data_path: str, output_pat
151
  try:
152
  process.wait(timeout=60) # added wait
153
  except subprocess.TimeoutExpired:
154
- print(
155
- "Imatrix computation timed out. Sending SIGINT to allow graceful termination..."
156
- )
157
  process.send_signal(signal.SIGINT)
158
  try:
159
  process.wait(timeout=5) # grace period
@@ -163,17 +125,7 @@ def generate_importance_matrix(model_path: str, train_data_path: str, output_pat
163
 
164
  print("Importance matrix generation completed.")
165
 
166
-
167
- def split_upload_model(
168
- model_path: str,
169
- outdir: str,
170
- repo_id: str,
171
- oauth_token: gr.OAuthToken | None,
172
- split_max_tensors=256,
173
- split_max_size=None,
174
- org_token=None,
175
- export_to_org=False,
176
- ):
177
  print(f"Model path: {model_path}")
178
  print(f"Output dir: {outdir}")
179
 
@@ -192,9 +144,7 @@ def split_upload_model(
192
  split_cmd.append(str(split_max_tensors))
193
 
194
  # args for output
195
- model_path_prefix = ".".join(
196
- model_path.split(".")[:-1]
197
- ) # remove the file extension
198
  split_cmd.append(model_path)
199
  split_cmd.append(model_path_prefix)
200
 
@@ -213,19 +163,15 @@ def split_upload_model(
213
  if os.path.exists(model_path):
214
  os.remove(model_path)
215
 
216
- model_file_prefix = model_path_prefix.split("/")[-1]
217
  print(f"Model file name prefix: {model_file_prefix}")
218
- sharded_model_files = [
219
- f
220
- for f in os.listdir(outdir)
221
- if f.startswith(model_file_prefix) and f.endswith(".gguf")
222
- ]
223
  if sharded_model_files:
224
  print(f"Sharded model files: {sharded_model_files}")
225
- if export_to_org and org_token != "":
226
- api = HfApi(token=org_token)
227
  else:
228
- api = HfApi(token=oauth_token.token)
229
  for file in sharded_model_files:
230
  file_path = os.path.join(outdir, file)
231
  print(f"Uploading file: {file_path}")
@@ -242,22 +188,9 @@ def split_upload_model(
242
 
243
  print("Sharded model has been uploaded successfully!")
244
 
245
-
246
- def process_model(
247
- model_id,
248
- q_method,
249
- use_imatrix,
250
- imatrix_q_method,
251
- private_repo,
252
- train_data_file,
253
- split_model,
254
- split_max_tensors,
255
- split_max_size,
256
- export_to_org,
257
- repo_owner,
258
- org_token,
259
- oauth_token: gr.OAuthToken | None,
260
- ):
261
  if oauth_token is None or oauth_token.token is None:
262
  raise gr.Error("You must be logged in to use GGUF-my-repo")
263
 
@@ -267,176 +200,91 @@ def process_model(
267
  if not export_to_org:
268
  repo_owner = "self"
269
 
 
270
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
271
- logger.info(
272
- f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}"
273
- )
274
 
275
  repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
276
- model_name = model_id.split("/")[-1]
277
  try:
278
- api_token = (
279
- org_token if (export_to_org and org_token != "") else oauth_token.token
280
- )
281
- api = HfApi(token=api_token)
282
-
283
- dl_pattern = ["*.md", "*.json", "*.model"]
284
- pattern = (
285
- "*.safetensors"
286
- if any(
287
- f.path.endswith(".safetensors")
288
- for f in api.list_repo_tree(repo_id=model_id, recursive=True)
289
- )
290
- else "*.bin"
291
- )
292
- dl_pattern += [pattern]
293
-
294
- # Use persistent directories
295
- model_download_dir = os.path.join(downloads_dir, model_name)
296
- os.makedirs(model_download_dir, exist_ok=True)
297
-
298
- with tempfile.TemporaryDirectory(dir=outputs_dir) as outdir:
299
- fp16 = str(Path(outdir) / f"{model_name}.fp16.gguf")
300
-
301
- with tempfile.TemporaryDirectory(dir=model_download_dir) as tmpdir:
302
- local_dir = Path(tmpdir) / model_name
303
- api.snapshot_download(
304
- repo_id=model_id,
305
- local_dir=local_dir,
306
- local_dir_use_symlinks=False,
307
- allow_patterns=dl_pattern,
308
- )
309
-
310
- config_dir = local_dir / "config.json"
311
- adapter_config_dir = local_dir / "adapter_config.json"
312
- if os.path.exists(adapter_config_dir) and not os.path.exists(
313
- config_dir
314
- ):
315
- raise Exception(
316
- "adapter_config.json is present. If converting LoRA, use GGUF-my-lora."
317
- )
318
-
319
- result = subprocess.run(
320
- [
321
- "python",
322
- CONVERSION_SCRIPT,
323
- local_dir,
324
- "--outtype",
325
- "f16",
326
- "--outfile",
327
- fp16,
328
- ],
329
- shell=False,
330
- capture_output=True,
331
- )
332
- if result.returncode != 0:
333
- raise Exception(
334
- f"Error converting to fp16: {result.stderr.decode()}"
335
- )
336
-
337
- imatrix_path = Path(outdir) / "imatrix.dat"
338
- if use_imatrix:
339
- train_data_path = (
340
- train_data_file.name
341
- if train_data_file
342
- else "llama.cpp/groups_merged.txt"
343
- )
344
- if not os.path.isfile(train_data_path):
345
- raise Exception(f"Training data not found: {train_data_path}")
346
- generate_importance_matrix(fp16, train_data_path, imatrix_path)
347
-
348
- quant_methods = (
349
- [imatrix_q_method]
350
- if use_imatrix
351
- else (q_method if isinstance(q_method, list) else [q_method])
352
- )
353
- suffix = "imat" if use_imatrix else None
354
-
355
- gguf_files = []
356
- for method in quant_methods:
357
- name = (
358
- f"{model_name.lower()}-{method.lower()}-{suffix}.gguf"
359
- if suffix
360
- else f"{model_name.lower()}-{method.lower()}.gguf"
361
- )
362
- path = str(Path(outdir) / name)
363
- quant_cmd = (
364
- [
365
- "./llama.cpp/llama-quantize",
366
- "--imatrix",
367
- imatrix_path,
368
- fp16,
369
- path,
370
- method,
371
- ]
372
- if use_imatrix
373
- else ["./llama.cpp/llama-quantize", fp16, path, method]
374
- )
375
- result = subprocess.run(quant_cmd, shell=False, capture_output=True)
376
- if result.returncode != 0:
377
- raise Exception(
378
- f"Quantization failed ({method}): {result.stderr.decode()}"
379
- )
380
- size = os.path.getsize(path) / 1024 / 1024 / 1024
381
- gguf_files.append((name, path, size, method))
382
-
383
- suffix_for_repo = (
384
- f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
385
- )
386
- repo_id = f"{repo_namespace}/{model_name}-{suffix_for_repo}-GGUF"
387
- new_repo_url = api.create_repo(
388
- repo_id=repo_id, exist_ok=True, private=private_repo
389
- )
390
-
391
- try:
392
- card = ModelCard.load(model_id, token=oauth_token.token)
393
- except:
394
- card = ModelCard("")
395
- card.data.tags = (card.data.tags or []) + ["llama-cpp", "gguf-my-repo"]
396
- card.data.base_model = model_id
397
- card.text = dedent(
398
- get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id)
399
- )
400
- readme_path = Path(outdir) / "README.md"
401
- card.save(readme_path)
402
- for name, path, _, _ in gguf_files:
403
- if split_model:
404
- split_upload_model(
405
- path,
406
- outdir,
407
- repo_id,
408
- oauth_token,
409
- split_max_tensors,
410
- split_max_size,
411
- org_token,
412
- export_to_org,
413
- )
414
- else:
415
- api.upload_file(
416
- path_or_fileobj=path, path_in_repo=name, repo_id=repo_id
417
- )
418
- if use_imatrix and os.path.isfile(imatrix_path):
419
- api.upload_file(
420
- path_or_fileobj=imatrix_path,
421
- path_in_repo="imatrix.dat",
422
- repo_id=repo_id,
423
- )
424
- api.upload_file(
425
- path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=repo_id
426
- )
427
-
428
- return (
429
- f'<h1>✅ DONE</h1><br/>Repo: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>',
430
- f"llama{np.random.randint(9)}.png",
431
- )
432
  except Exception as e:
433
- raise (
434
- f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>',
435
- "error.png",
436
- )
437
 
438
 
439
- css = """/* Custom CSS to allow scrolling */
440
  .gradio-container {overflow-y: auto;}
441
  """
442
  model_id = HuggingfaceHubSearch(
@@ -448,36 +296,30 @@ model_id = HuggingfaceHubSearch(
448
  export_to_org = gr.Checkbox(
449
  label="Export to Organization Repository",
450
  value=False,
451
- info="If checked, you can select an organization to export to.",
452
  )
453
 
454
  repo_owner = gr.Dropdown(
455
- choices=["self"], value="self", label="Repository Owner", visible=False
 
 
 
456
  )
457
 
458
- org_token = gr.Textbox(label="Org Access Token", type="password", visible=False)
 
 
 
 
459
 
460
  q_method = gr.Dropdown(
461
- [
462
- "Q2_K",
463
- "Q3_K_S",
464
- "Q3_K_M",
465
- "Q3_K_L",
466
- "Q4_0",
467
- "Q4_K_S",
468
- "Q4_K_M",
469
- "Q5_0",
470
- "Q5_K_S",
471
- "Q5_K_M",
472
- "Q6_K",
473
- "Q8_0",
474
- ],
475
  label="Quantization Method",
476
  info="GGML quantization type",
477
  value="Q4_K_M",
478
  filterable=False,
479
  visible=True,
480
- multiselect=True,
481
  )
482
 
483
  imatrix_q_method = gr.Dropdown(
@@ -486,36 +328,44 @@ imatrix_q_method = gr.Dropdown(
486
  info="GGML imatrix quants type",
487
  value="IQ4_NL",
488
  filterable=False,
489
- visible=False,
490
  )
491
 
492
  use_imatrix = gr.Checkbox(
493
  value=False,
494
  label="Use Imatrix Quantization",
495
- info="Use importance matrix for quantization.",
496
  )
497
 
498
  private_repo = gr.Checkbox(
499
- value=False, label="Private Repo", info="Create a private repo under your username."
 
 
500
  )
501
 
502
- train_data_file = gr.File(label="Training Data File", file_types=["txt"], visible=False)
 
 
 
 
503
 
504
  split_model = gr.Checkbox(
505
- value=False, label="Split Model", info="Shard the model using gguf-split."
 
 
506
  )
507
 
508
  split_max_tensors = gr.Number(
509
  value=256,
510
  label="Max Tensors per File",
511
  info="Maximum number of tensors per file when splitting model.",
512
- visible=False,
513
  )
514
 
515
  split_max_size = gr.Textbox(
516
  label="Max File Size",
517
  info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default. Accepted suffixes: M, G. Example: 256M, 5G",
518
- visible=False,
519
  )
520
 
521
  iface = gr.Interface(
@@ -532,47 +382,35 @@ iface = gr.Interface(
532
  split_max_size,
533
  export_to_org,
534
  repo_owner,
535
- org_token,
 
 
 
 
536
  ],
537
- outputs=[gr.Markdown(label="Output"), gr.Image(show_label=False)],
538
  title="Make your own GGUF Quants — faster than ever before, believe me.",
539
  description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
540
- api_name=False,
541
  )
542
  with gr.Blocks(css=".gradio-container {overflow-y: auto;}") as demo:
543
  gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
544
  gr.LoginButton(min_width=250)
545
 
546
- export_to_org.change(
547
- fn=toggle_repo_owner, inputs=[export_to_org], outputs=[repo_owner, org_token]
548
- )
549
-
550
- split_model.change(
551
- fn=lambda sm: (gr.update(visible=sm), gr.update(visible=sm)),
552
- inputs=split_model,
553
- outputs=[split_max_tensors, split_max_size],
554
- )
555
- use_imatrix.change(
556
- fn=lambda use: (
557
- gr.update(visible=not use),
558
- gr.update(visible=use),
559
- gr.update(visible=use),
560
- ),
561
- inputs=use_imatrix,
562
- outputs=[q_method, imatrix_q_method, train_data_file],
563
- )
564
 
565
  iface.render()
566
 
567
 
568
  def restart_space():
569
- HfApi().restart_space(
570
- repo_id="Antigma/quantize-my-repo", token=HF_TOKEN, factory_reboot=True
571
- )
572
-
573
 
574
  scheduler = BackgroundScheduler()
575
  scheduler.add_job(restart_space, "interval", seconds=21600)
576
  scheduler.start()
577
 
578
- demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)
 
15
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
16
  CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
17
 
 
 
 
 
18
  log_dir = "/data/logs"
19
  downloads_dir = "/data/downloads"
20
  outputs_dir = "/data/outputs"
 
 
 
21
  os.makedirs(log_dir, exist_ok=True)
 
 
 
22
 
23
  logging.basicConfig(
24
  filename=os.path.join(log_dir, "app.log"),
25
  level=logging.INFO,
26
+ format="%(asctime)s - %(levelname)s - %(message)s"
27
  )
28
 
29
  logger = logging.getLogger(__name__)
30
 
31
+ def get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id = None,):
 
 
 
 
 
 
32
  try:
33
  result = subprocess.run(
34
+ ['git', '-C', './llama.cpp', 'describe', '--tags', '--always'],
35
  stdout=subprocess.PIPE,
36
  stderr=subprocess.PIPE,
37
  check=True,
38
+ text=True
39
  )
40
+ version = result.stdout.strip().split('-')[0]
41
  text = f"""
42
  *Produced by [Antigma Labs](https://antigma.ai)*
43
  ## llama.cpp quantization
 
46
  Run them directly with [llama.cpp](https://github.com/ggml-org/llama.cpp), or any other llama.cpp based project
47
  ## Prompt format
48
  ```
49
+ <|begin▁of▁sentence|>{{system_prompt}}<|User|>{{prompt}}<|Assistant|><|end▁of▁sentence|><|Assistant|>
 
50
  ```
51
  ## Download a file (not the whole branch) from below:
52
  | Filename | Quant type | File Size | Split |
 
78
 
79
 
80
  def get_repo_namespace(repo_owner, username, user_orgs):
81
+ if repo_owner == 'self':
82
  return username
83
  for org in user_orgs:
84
+ if org['name'] == repo_owner:
85
+ return org['name']
86
  raise ValueError(f"Invalid repo_owner: {repo_owner}")
87
 
 
88
  def escape(s: str) -> str:
89
+ return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace("\n", "<br/>")
 
 
 
 
 
 
 
90
 
91
  def toggle_repo_owner(export_to_org, oauth_token: gr.OAuthToken | None):
92
  if oauth_token is None or oauth_token.token is None:
93
  raise gr.Error("You must be logged in to use GGUF-my-repo")
94
  if not export_to_org:
95
+ return gr.update(visible=False, choices=["self"], value="self"), gr.update(visible=False, value="")
 
 
96
  info = whoami(oauth_token.token)
97
  orgs = [org["name"] for org in info.get("orgs", [])]
98
+ return gr.update(visible=True, choices=["self"] + orgs, value="self"), gr.update(visible=True)
 
 
 
 
99
  def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
100
  imatrix_command = [
101
  "./llama.cpp/llama-imatrix",
102
+ "-m", model_path,
103
+ "-f", train_data_path,
104
+ "-ngl", "99",
105
+ "--output-frequency", "10",
106
+ "-o", output_path,
 
 
 
 
 
107
  ]
108
 
109
  if not os.path.isfile(model_path):
 
115
  try:
116
  process.wait(timeout=60) # added wait
117
  except subprocess.TimeoutExpired:
118
+ print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
 
 
119
  process.send_signal(signal.SIGINT)
120
  try:
121
  process.wait(timeout=5) # grace period
 
125
 
126
  print("Importance matrix generation completed.")
127
 
128
+ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token: gr.OAuthToken | None, split_max_tensors=256, split_max_size=None, org_token=None, export_to_org=False):
 
 
 
 
 
 
 
 
 
 
129
  print(f"Model path: {model_path}")
130
  print(f"Output dir: {outdir}")
131
 
 
144
  split_cmd.append(str(split_max_tensors))
145
 
146
  # args for output
147
+ model_path_prefix = '.'.join(model_path.split('.')[:-1]) # remove the file extension
 
 
148
  split_cmd.append(model_path)
149
  split_cmd.append(model_path_prefix)
150
 
 
163
  if os.path.exists(model_path):
164
  os.remove(model_path)
165
 
166
+ model_file_prefix = model_path_prefix.split('/')[-1]
167
  print(f"Model file name prefix: {model_file_prefix}")
168
+ sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
 
 
 
 
169
  if sharded_model_files:
170
  print(f"Sharded model files: {sharded_model_files}")
171
+ if export_to_org and org_token!="":
172
+ api = HfApi(token = org_token)
173
  else:
174
+ api = HfApi(token=oauth_token.token)
175
  for file in sharded_model_files:
176
  file_path = os.path.join(outdir, file)
177
  print(f"Uploading file: {file_path}")
 
188
 
189
  print("Sharded model has been uploaded successfully!")
190
 
191
+ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo,
192
+ train_data_file, split_model, split_max_tensors, split_max_size,
193
+ export_to_org, repo_owner, org_token, oauth_token: gr.OAuthToken | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  if oauth_token is None or oauth_token.token is None:
195
  raise gr.Error("You must be logged in to use GGUF-my-repo")
196
 
 
200
  if not export_to_org:
201
  repo_owner = "self"
202
 
203
+
204
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
205
+ logger.info(f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}")
 
 
206
 
207
  repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
208
+ model_name = model_id.split('/')[-1]
209
  try:
210
+ api_token = org_token if (export_to_org and org_token!="") else oauth_token.token
211
+ api = HfApi(token=api_token)
212
+
213
+ dl_pattern = ["*.md", "*.json", "*.model"]
214
+ pattern = "*.safetensors" if any(
215
+ f.path.endswith(".safetensors")
216
+ for f in api.list_repo_tree(repo_id=model_id, recursive=True)
217
+ ) else "*.bin"
218
+ dl_pattern += [pattern]
219
+
220
+ os.makedirs(downloads_dir, exist_ok=True)
221
+ os.makedirs(outputs_dir, exist_ok=True)
222
+
223
+ with tempfile.TemporaryDirectory(dir=outputs_dir) as outdir:
224
+ fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
225
+
226
+ with tempfile.TemporaryDirectory(dir=downloads_dir) as tmpdir:
227
+ local_dir = Path(tmpdir)/model_name
228
+ api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
229
+
230
+ config_dir = local_dir/"config.json"
231
+ adapter_config_dir = local_dir/"adapter_config.json"
232
+ if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
233
+ raise Exception("adapter_config.json is present. If converting LoRA, use GGUF-my-lora.")
234
+
235
+ result = subprocess.run(["python", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16], shell=False, capture_output=True)
236
+ if result.returncode != 0:
237
+ raise Exception(f"Error converting to fp16: {result.stderr.decode()}")
238
+
239
+ imatrix_path = Path(outdir)/"imatrix.dat"
240
+ if use_imatrix:
241
+ train_data_path = train_data_file.name if train_data_file else "llama.cpp/groups_merged.txt"
242
+ if not os.path.isfile(train_data_path):
243
+ raise Exception(f"Training data not found: {train_data_path}")
244
+ generate_importance_matrix(fp16, train_data_path, imatrix_path)
245
+
246
+ quant_methods = [imatrix_q_method] if use_imatrix else (q_method if isinstance(q_method, list) else [q_method])
247
+ suffix = "imat" if use_imatrix else None
248
+
249
+ gguf_files = []
250
+ for method in quant_methods:
251
+ name = f"{model_name.lower()}-{method.lower()}-{suffix}.gguf" if suffix else f"{model_name.lower()}-{method.lower()}.gguf"
252
+ path = str(Path(outdir)/name)
253
+ quant_cmd = ["./llama.cpp/llama-quantize", "--imatrix", imatrix_path, fp16, path, method] if use_imatrix else ["./llama.cpp/llama-quantize", fp16, path, method]
254
+ result = subprocess.run(quant_cmd, shell=False, capture_output=True)
255
+ if result.returncode != 0:
256
+ raise Exception(f"Quantization failed ({method}): {result.stderr.decode()}")
257
+ size = os.path.getsize(path)/1024/1024/1024
258
+ gguf_files.append((name, path, size, method))
259
+
260
+ suffix_for_repo = f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
261
+ repo_id = f"{repo_namespace}/{model_name}-{suffix_for_repo}-GGUF"
262
+ new_repo_url = api.create_repo(repo_id=repo_id, exist_ok=True, private=private_repo)
263
+
264
+ try:
265
+ card = ModelCard.load(model_id, token=oauth_token.token)
266
+ except:
267
+ card = ModelCard("")
268
+ card.data.tags = (card.data.tags or []) + ["llama-cpp", "gguf-my-repo"]
269
+ card.data.base_model = model_id
270
+ card.text = dedent(get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id))
271
+ readme_path = Path(outdir)/"README.md"
272
+ card.save(readme_path)
273
+ for name, path, _, _ in gguf_files:
274
+ if split_model:
275
+ split_upload_model(path, outdir, repo_id, oauth_token, split_max_tensors, split_max_size, org_token, export_to_org)
276
+ else:
277
+ api.upload_file(path_or_fileobj=path, path_in_repo=name, repo_id=repo_id)
278
+ if use_imatrix and os.path.isfile(imatrix_path):
279
+ api.upload_file(path_or_fileobj=imatrix_path, path_in_repo="imatrix.dat", repo_id=repo_id)
280
+ api.upload_file(path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=repo_id)
281
+
282
+ return (f'<h1>✅ DONE</h1><br/>Repo: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>', f"llama{np.random.randint(9)}.png")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  except Exception as e:
284
+ raise (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
 
 
 
285
 
286
 
287
+ css="""/* Custom CSS to allow scrolling */
288
  .gradio-container {overflow-y: auto;}
289
  """
290
  model_id = HuggingfaceHubSearch(
 
296
  export_to_org = gr.Checkbox(
297
  label="Export to Organization Repository",
298
  value=False,
299
+ info="If checked, you can select an organization to export to."
300
  )
301
 
302
  repo_owner = gr.Dropdown(
303
+ choices=["self"],
304
+ value="self",
305
+ label="Repository Owner",
306
+ visible=False
307
  )
308
 
309
+ org_token = gr.Textbox(
310
+ label="Org Access Token",
311
+ type="password",
312
+ visible=False
313
+ )
314
 
315
  q_method = gr.Dropdown(
316
+ ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  label="Quantization Method",
318
  info="GGML quantization type",
319
  value="Q4_K_M",
320
  filterable=False,
321
  visible=True,
322
+ multiselect=True
323
  )
324
 
325
  imatrix_q_method = gr.Dropdown(
 
328
  info="GGML imatrix quants type",
329
  value="IQ4_NL",
330
  filterable=False,
331
+ visible=False
332
  )
333
 
334
  use_imatrix = gr.Checkbox(
335
  value=False,
336
  label="Use Imatrix Quantization",
337
+ info="Use importance matrix for quantization."
338
  )
339
 
340
  private_repo = gr.Checkbox(
341
+ value=False,
342
+ label="Private Repo",
343
+ info="Create a private repo under your username."
344
  )
345
 
346
+ train_data_file = gr.File(
347
+ label="Training Data File",
348
+ file_types=["txt"],
349
+ visible=False
350
+ )
351
 
352
  split_model = gr.Checkbox(
353
+ value=False,
354
+ label="Split Model",
355
+ info="Shard the model using gguf-split."
356
  )
357
 
358
  split_max_tensors = gr.Number(
359
  value=256,
360
  label="Max Tensors per File",
361
  info="Maximum number of tensors per file when splitting model.",
362
+ visible=False
363
  )
364
 
365
  split_max_size = gr.Textbox(
366
  label="Max File Size",
367
  info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default. Accepted suffixes: M, G. Example: 256M, 5G",
368
+ visible=False
369
  )
370
 
371
  iface = gr.Interface(
 
382
  split_max_size,
383
  export_to_org,
384
  repo_owner,
385
+ org_token
386
+ ],
387
+ outputs=[
388
+ gr.Markdown(label="Output"),
389
+ gr.Image(show_label=False)
390
  ],
 
391
  title="Make your own GGUF Quants — faster than ever before, believe me.",
392
  description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
393
+ api_name=False
394
  )
395
  with gr.Blocks(css=".gradio-container {overflow-y: auto;}") as demo:
396
  gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
397
  gr.LoginButton(min_width=250)
398
 
399
+
400
+
401
+ export_to_org.change(fn=toggle_repo_owner, inputs=[export_to_org], outputs=[repo_owner, org_token])
402
+
403
+ split_model.change(fn=lambda sm: (gr.update(visible=sm), gr.update(visible=sm)), inputs=split_model, outputs=[split_max_tensors, split_max_size])
404
+ use_imatrix.change(fn=lambda use: (gr.update(visible=not use), gr.update(visible=use), gr.update(visible=use)), inputs=use_imatrix, outputs=[q_method, imatrix_q_method, train_data_file])
 
 
 
 
 
 
 
 
 
 
 
 
405
 
406
  iface.render()
407
 
408
 
409
  def restart_space():
410
+ HfApi().restart_space(repo_id="Antigma/quantize-my-repo", token=HF_TOKEN, factory_reboot=True)
 
 
 
411
 
412
  scheduler = BackgroundScheduler()
413
  scheduler.add_job(restart_space, "interval", seconds=21600)
414
  scheduler.start()
415
 
416
+ demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)