mohanz commited on
Commit
c8ce925
·
1 Parent(s): 0e2eafc

big refactor to include original model card

Browse files
Files changed (1) hide show
  1. app.py +164 -171
app.py CHANGED
@@ -3,7 +3,6 @@ import subprocess
3
  import signal
4
  import tempfile
5
  from pathlib import Path
6
- from textwrap import dedent
7
  import logging
8
  import gradio as gr
9
  from huggingface_hub import HfApi, ModelCard, whoami
@@ -32,12 +31,7 @@ logging.basicConfig(
32
  logger = logging.getLogger(__name__)
33
 
34
 
35
- def get_llama_cpp_notes(
36
- gguf_files,
37
- new_repo_url,
38
- split_model,
39
- model_id=None,
40
- ):
41
  try:
42
  result = subprocess.run(
43
  ["git", "-C", "./llama.cpp", "describe", "--tags", "--always"],
@@ -47,58 +41,13 @@ def get_llama_cpp_notes(
47
  text=True,
48
  )
49
  version = result.stdout.strip().split("-")[0]
50
- text = f"""
51
- *Produced by [Antigma Labs](https://antigma.ai), [Antigma Quantize Space](https://huggingface.co/spaces/Antigma/quantize-my-repo)*
52
-
53
- *Follow Antigma Labs in X [https://x.com/antigma_labs](https://x.com/antigma_labs)*
54
-
55
- *Antigma's GitHub Homepage [https://github.com/AntigmaLabs](https://github.com/AntigmaLabs)*
56
-
57
- ## llama.cpp quantization
58
- Using <a href="https://github.com/ggml-org/llama.cpp">llama.cpp</a> release <a href="https://github.com/ggml-org/llama.cpp/releases/tag/{version}">{version}</a> for quantization.
59
- Original model: https://huggingface.co/{model_id}
60
- Run them directly with [llama.cpp](https://github.com/ggml-org/llama.cpp), or any other llama.cpp based project
61
- ## Prompt format
62
- ```
63
- <|begin▁of▁sentence|>{{system_prompt}}<|User|>{{prompt}}<|Assistant|><|end▁of▁sentence|><|Assistant|>
64
- ```
65
- ## Download a file (not the whole branch) from below:
66
- | Filename | Quant type | File Size | Split |
67
- | -------- | ---------- | --------- | ----- |
68
- | {'|'.join(['|'.join([gguf_files[i][0][:-5] if split_model else ('['+gguf_files[i][0]+']'+'(' + new_repo_url+'/blob/main/'+gguf_files[i][0] + ')'), gguf_files[i][3], f"{gguf_files[i][2]:.2f}" + ' GB', str(split_model),'''
69
- ''']) for i in range(len(gguf_files))]) }
70
- ## Downloading using huggingface-cli
71
- <details>
72
- <summary>Click to view download instructions</summary>
73
- First, make sure you have hugginface-cli installed:
74
-
75
- ```
76
- pip install -U "huggingface_hub[cli]"
77
- ```
78
-
79
- Then, you can target the specific file you want:
80
-
81
- ```
82
- huggingface-cli download {new_repo_url} --include "{gguf_files[0][0]}" --local-dir ./
83
- ```
84
-
85
- If the model is bigger than 50GB, it will have been split into multiple files. In order to download them all to a local folder, run:
86
-
87
- ```
88
- huggingface-cli download {new_repo_url} --include "{gguf_files[0][0]}/*" --local-dir ./
89
- ```
90
-
91
- You can either specify a new local-dir (deepseek-ai_DeepSeek-V3-0324-Q8_0) or download them all in place (./)
92
-
93
- </details>
94
- """
95
- return text
96
  except subprocess.CalledProcessError as e:
97
- print("Error:", e.stderr.strip())
98
  return None
99
 
100
 
101
- def get_repo_namespace(repo_owner, username, user_orgs):
102
  if repo_owner == "self":
103
  return username
104
  for org in user_orgs:
@@ -117,7 +66,7 @@ def escape(s: str) -> str:
117
  )
118
 
119
 
120
- def toggle_repo_owner(export_to_org, oauth_token: gr.OAuthToken | None):
121
  if oauth_token is None or oauth_token.token is None:
122
  raise gr.Error("You must be logged in to use quantize-my-repo")
123
  if not export_to_org:
@@ -131,7 +80,9 @@ def toggle_repo_owner(export_to_org, oauth_token: gr.OAuthToken | None):
131
  )
132
 
133
 
134
- def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
 
 
135
  imatrix_command = [
136
  "./llama.cpp/llama-imatrix",
137
  "-m",
@@ -147,25 +98,27 @@ def generate_importance_matrix(model_path: str, train_data_path: str, output_pat
147
  ]
148
 
149
  if not os.path.isfile(model_path):
150
- raise Exception(f"Model file not found: {model_path}")
151
 
152
- print("Running imatrix command...")
153
  process = subprocess.Popen(imatrix_command, shell=False)
154
 
155
  try:
156
- process.wait(timeout=60) # added wait
157
  except subprocess.TimeoutExpired:
158
- print(
159
  "Imatrix computation timed out. Sending SIGINT to allow graceful termination..."
160
  )
161
  process.send_signal(signal.SIGINT)
162
  try:
163
- process.wait(timeout=5) # grace period
164
  except subprocess.TimeoutExpired:
165
- print("Imatrix proc still didn't term. Forecfully terming process...")
 
 
166
  process.kill()
167
 
168
- print("Importance matrix generation completed.")
169
 
170
 
171
  def split_upload_model(
@@ -173,101 +126,160 @@ def split_upload_model(
173
  outdir: str,
174
  repo_id: str,
175
  oauth_token: gr.OAuthToken | None,
176
- split_max_tensors=256,
177
- split_max_size=None,
178
- org_token=None,
179
- export_to_org=False,
180
- ):
181
- print(f"Model path: {model_path}")
182
- print(f"Output dir: {outdir}")
183
 
184
  if oauth_token is None or oauth_token.token is None:
185
  raise ValueError("You have to be logged in.")
186
 
187
- split_cmd = [
188
- "./llama.cpp/llama-gguf-split",
189
- "--split",
190
- ]
191
  if split_max_size:
192
- split_cmd.append("--split-max-size")
193
- split_cmd.append(split_max_size)
194
  else:
195
- split_cmd.append("--split-max-tensors")
196
- split_cmd.append(str(split_max_tensors))
197
 
198
- # args for output
199
- model_path_prefix = ".".join(
200
- model_path.split(".")[:-1]
201
- ) # remove the file extension
202
- split_cmd.append(model_path)
203
- split_cmd.append(model_path_prefix)
204
 
205
- print(f"Split command: {split_cmd}")
206
 
207
  result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
208
- print(f"Split command stdout: {result.stdout}")
209
- print(f"Split command stderr: {result.stderr}")
210
 
211
  if result.returncode != 0:
212
- stderr_str = result.stderr.decode("utf-8")
213
- raise Exception(f"Error splitting the model: {stderr_str}")
214
- print("Model split successfully!")
215
 
216
- # remove the original model file if needed
217
  if os.path.exists(model_path):
218
  os.remove(model_path)
219
 
220
  model_file_prefix = model_path_prefix.split("/")[-1]
221
- print(f"Model file name prefix: {model_file_prefix}")
222
  sharded_model_files = [
223
  f
224
  for f in os.listdir(outdir)
225
  if f.startswith(model_file_prefix) and f.endswith(".gguf")
226
  ]
227
- if sharded_model_files:
228
- print(f"Sharded model files: {sharded_model_files}")
229
- if export_to_org and org_token != "":
230
- api = HfApi(token=org_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  else:
232
- api = HfApi(token=oauth_token.token)
233
- for file in sharded_model_files:
234
- file_path = os.path.join(outdir, file)
235
- print(f"Uploading file: {file_path}")
236
- try:
237
- api.upload_file(
238
- path_or_fileobj=file_path,
239
- path_in_repo=file,
240
- repo_id=repo_id,
241
- )
242
- except Exception as e:
243
- raise Exception(f"Error uploading file {file_path}: {e}")
244
- else:
245
- raise Exception("No sharded files found.")
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
- print("Sharded model has been uploaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
 
250
  def process_model(
251
- model_id,
252
- q_method,
253
- use_imatrix,
254
- imatrix_q_method,
255
- private_repo,
256
- train_data_file,
257
- split_model,
258
- split_max_tensors,
259
- split_max_size,
260
- export_to_org,
261
- repo_owner,
262
- org_token,
263
  oauth_token: gr.OAuthToken | None,
264
- ):
265
  if oauth_token is None or oauth_token.token is None:
266
  raise gr.Error("You must be logged in to use quantize-my-repo")
267
  try:
268
  whoami(oauth_token.token)
269
  except Exception as e:
270
- raise gr.Error("You must be logged in to use quantize-my-repo")
271
 
272
  user_info = whoami(oauth_token.token)
273
  username = user_info["name"]
@@ -277,15 +289,17 @@ def process_model(
277
 
278
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
279
  logger.info(
280
- f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}"
 
 
 
 
281
  )
282
 
283
  repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
284
  model_name = model_id.split("/")[-1]
285
  try:
286
- api_token = (
287
- org_token if (export_to_org and org_token != "") else oauth_token.token
288
- )
289
  api = HfApi(token=api_token)
290
 
291
  dl_pattern = ["*.md", "*.json", "*.model"]
@@ -297,7 +311,7 @@ def process_model(
297
  )
298
  else "*.bin"
299
  )
300
- dl_pattern += [pattern]
301
 
302
  os.makedirs(downloads_dir, exist_ok=True)
303
  os.makedirs(outputs_dir, exist_ok=True)
@@ -306,10 +320,7 @@ def process_model(
306
  fp16 = str(Path(outdir) / f"{model_name}.fp16.gguf")
307
 
308
  with tempfile.TemporaryDirectory(dir=downloads_dir) as tmpdir:
309
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Start download")
310
- logger.info(
311
- datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Start download"
312
- )
313
  local_dir = Path(tmpdir) / model_name
314
  api.snapshot_download(
315
  repo_id=model_id,
@@ -323,17 +334,10 @@ def process_model(
323
  if os.path.exists(adapter_config_dir) and not os.path.exists(
324
  config_dir
325
  ):
326
- raise Exception(
327
  "adapter_config.json is present. If converting LoRA, use GGUF-my-lora."
328
  )
329
- print(
330
- datetime.now().strftime("%Y-%m-%d %H:%M:%S")
331
- + " Download successfully"
332
- )
333
- logger.info(
334
- datetime.now().strftime("%Y-%m-%d %H:%M:%S")
335
- + " Download successfully"
336
- )
337
 
338
  result = subprocess.run(
339
  [
@@ -348,15 +352,10 @@ def process_model(
348
  shell=False,
349
  capture_output=True,
350
  )
351
- print(
352
- datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Converted to f16"
353
- )
354
- logger.info(
355
- datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Converted to f16"
356
- )
357
 
358
  if result.returncode != 0:
359
- raise Exception(
360
  f"Error converting to fp16: {result.stderr.decode()}"
361
  )
362
  shutil.rmtree(downloads_dir)
@@ -369,7 +368,9 @@ def process_model(
369
  else "llama.cpp/groups_merged.txt"
370
  )
371
  if not os.path.isfile(train_data_path):
372
- raise Exception(f"Training data not found: {train_data_path}")
 
 
373
  generate_importance_matrix(fp16, train_data_path, imatrix_path)
374
 
375
  quant_methods = (
@@ -381,11 +382,7 @@ def process_model(
381
 
382
  gguf_files = []
383
  for method in quant_methods:
384
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Begin quantize")
385
- logger.info(
386
- datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Begin quantize"
387
- )
388
-
389
  name = (
390
  f"{model_name.lower()}-{method.lower()}-{suffix}.gguf"
391
  if suffix
@@ -406,18 +403,13 @@ def process_model(
406
  )
407
  result = subprocess.run(quant_cmd, shell=False, capture_output=True)
408
  if result.returncode != 0:
409
- raise Exception(
410
  f"Quantization failed ({method}): {result.stderr.decode()}"
411
  )
412
  size = os.path.getsize(path) / 1024 / 1024 / 1024
413
  gguf_files.append((name, path, size, method))
414
 
415
- print(
416
- datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Quantize successfully!"
417
- )
418
- logger.info(
419
- datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " Quantize successfully!"
420
- )
421
 
422
  suffix_for_repo = (
423
  f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
@@ -428,13 +420,12 @@ def process_model(
428
  )
429
 
430
  try:
431
- card = ModelCard.load(model_id, token=oauth_token.token)
432
- except:
433
- card = ModelCard("")
434
- card.data.tags = (card.data.tags or []) + ["antigma", "quantize-my-repo"]
435
- card.data.base_model = model_id
436
- card.text = dedent(
437
- get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id)
438
  )
439
  readme_path = Path(outdir) / "README.md"
440
  card.save(readme_path)
@@ -478,6 +469,7 @@ def process_model(
478
  css = """/* Custom CSS to allow scrolling */
479
  .gradio-container {overflow-y: auto;}
480
  """
 
481
  model_id = HuggingfaceHubSearch(
482
  label="Hub Model ID",
483
  placeholder="Search for model id on Huggingface",
@@ -578,6 +570,7 @@ iface = gr.Interface(
578
  description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
579
  api_name=False,
580
  )
 
581
  with gr.Blocks(css=".gradio-container {overflow-y: auto;}") as demo:
582
  gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
583
  gr.LoginButton(min_width=250)
 
3
  import signal
4
  import tempfile
5
  from pathlib import Path
 
6
  import logging
7
  import gradio as gr
8
  from huggingface_hub import HfApi, ModelCard, whoami
 
31
  logger = logging.getLogger(__name__)
32
 
33
 
34
+ def get_llama_cpp_version():
 
 
 
 
 
35
  try:
36
  result = subprocess.run(
37
  ["git", "-C", "./llama.cpp", "describe", "--tags", "--always"],
 
41
  text=True,
42
  )
43
  version = result.stdout.strip().split("-")[0]
44
+ return version
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  except subprocess.CalledProcessError as e:
46
+ logger.error("Error getting llama.cpp version: %s", e.stderr.strip())
47
  return None
48
 
49
 
50
+ def get_repo_namespace(repo_owner: str, username: str, user_orgs: list) -> str:
51
  if repo_owner == "self":
52
  return username
53
  for org in user_orgs:
 
66
  )
67
 
68
 
69
+ def toggle_repo_owner(export_to_org: bool, oauth_token: gr.OAuthToken | None) -> tuple:
70
  if oauth_token is None or oauth_token.token is None:
71
  raise gr.Error("You must be logged in to use quantize-my-repo")
72
  if not export_to_org:
 
80
  )
81
 
82
 
83
+ def generate_importance_matrix(
84
+ model_path: str, train_data_path: str, output_path: str
85
+ ) -> None:
86
  imatrix_command = [
87
  "./llama.cpp/llama-imatrix",
88
  "-m",
 
98
  ]
99
 
100
  if not os.path.isfile(model_path):
101
+ raise FileNotFoundError(f"Model file not found: {model_path}")
102
 
103
+ logger.info("Running imatrix command...")
104
  process = subprocess.Popen(imatrix_command, shell=False)
105
 
106
  try:
107
+ process.wait(timeout=60)
108
  except subprocess.TimeoutExpired:
109
+ logger.warning(
110
  "Imatrix computation timed out. Sending SIGINT to allow graceful termination..."
111
  )
112
  process.send_signal(signal.SIGINT)
113
  try:
114
+ process.wait(timeout=5)
115
  except subprocess.TimeoutExpired:
116
+ logger.error(
117
+ "Imatrix proc still didn't term. Forecfully terming process..."
118
+ )
119
  process.kill()
120
 
121
+ logger.info("Importance matrix generation completed.")
122
 
123
 
124
  def split_upload_model(
 
126
  outdir: str,
127
  repo_id: str,
128
  oauth_token: gr.OAuthToken | None,
129
+ split_max_tensors: int = 256,
130
+ split_max_size: str | None = None,
131
+ org_token: str | None = None,
132
+ export_to_org: bool = False,
133
+ ) -> None:
134
+ logger.info("Model path: %s", model_path)
135
+ logger.info("Output dir: %s", outdir)
136
 
137
  if oauth_token is None or oauth_token.token is None:
138
  raise ValueError("You have to be logged in.")
139
 
140
+ split_cmd = ["./llama.cpp/llama-gguf-split", "--split"]
 
 
 
141
  if split_max_size:
142
+ split_cmd.extend(["--split-max-size", split_max_size])
 
143
  else:
144
+ split_cmd.extend(["--split-max-tensors", str(split_max_tensors)])
 
145
 
146
+ model_path_prefix = ".".join(model_path.split(".")[:-1])
147
+ split_cmd.extend([model_path, model_path_prefix])
 
 
 
 
148
 
149
+ logger.info("Split command: %s", split_cmd)
150
 
151
  result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
152
+ logger.info("Split command stdout: %s", result.stdout)
153
+ logger.info("Split command stderr: %s", result.stderr)
154
 
155
  if result.returncode != 0:
156
+ raise RuntimeError(f"Error splitting the model: {result.stderr}")
157
+ logger.info("Model split successfully!")
 
158
 
 
159
  if os.path.exists(model_path):
160
  os.remove(model_path)
161
 
162
  model_file_prefix = model_path_prefix.split("/")[-1]
163
+ logger.info("Model file name prefix: %s", model_file_prefix)
164
  sharded_model_files = [
165
  f
166
  for f in os.listdir(outdir)
167
  if f.startswith(model_file_prefix) and f.endswith(".gguf")
168
  ]
169
+
170
+ if not sharded_model_files:
171
+ raise RuntimeError("No sharded files found.")
172
+
173
+ logger.info("Sharded model files: %s", sharded_model_files)
174
+ api = HfApi(token=org_token if (export_to_org and org_token) else oauth_token.token)
175
+
176
+ for file in sharded_model_files:
177
+ file_path = os.path.join(outdir, file)
178
+ logger.info("Uploading file: %s", file_path)
179
+ try:
180
+ api.upload_file(
181
+ path_or_fileobj=file_path,
182
+ path_in_repo=file,
183
+ repo_id=repo_id,
184
+ )
185
+ except Exception as e:
186
+ raise RuntimeError(f"Error uploading file {file_path}: {e}") from e
187
+
188
+ logger.info("Sharded model has been uploaded successfully!")
189
+
190
+
191
+ def get_new_model_card(
192
+ original_card: ModelCard,
193
+ original_model_id: str,
194
+ gguf_files: list,
195
+ new_repo_url: str,
196
+ split_model: bool,
197
+ ) -> ModelCard:
198
+ version = get_llama_cpp_version()
199
+ model_card = original_card.copy()
200
+ model_card.data.tags = (model_card.data.tags or []) + [
201
+ "antigma",
202
+ "quantize-my-repo",
203
+ ]
204
+
205
+ # Format the table rows
206
+ table_rows = []
207
+ for file_info in gguf_files:
208
+ name, _, size, method = file_info
209
+ if split_model:
210
+ display_name = name[:-5]
211
  else:
212
+ display_name = f"[{name}]({new_repo_url}/blob/main/{name})"
213
+ table_rows.append(f"{display_name}|{method}|{size:.2f} GB|{split_model}")
214
+
215
+ model_card.text = f"""
216
+ *Produced by [Antigma Labs](https://antigma.ai), [Antigma Quantize Space](https://huggingface.co/spaces/Antigma/quantize-my-repo)*
217
+
218
+ *Follow Antigma Labs in X [https://x.com/antigma_labs](https://x.com/antigma_labs)*
219
+
220
+ *Antigma's GitHub Homepage [https://github.com/AntigmaLabs](https://github.com/AntigmaLabs)*
221
+
222
+ ## Quantization Format (GGUF)
223
+ We use <a href="https://github.com/ggml-org/llama.cpp">llama.cpp</a> release <a href="https://github.com/ggml-org/llama.cpp/releases/tag/{version}">{version}</a> for quantization.
224
+ Original model: https://huggingface.co/{original_model_id}
225
+
226
+ ## Download a file (not the whole branch) from below:
227
+ | Filename | Quant type | File Size | Split |
228
+ | -------- | ---------- | --------- | ----- |
229
+ | {'|'.join(table_rows)}
230
+
231
+ ## Original Model Card
232
+ {original_card.text}
233
+
234
+ ## Downloading using huggingface-cli
235
+ <details>
236
+ <summary>Click to view download instructions</summary>
237
+ First, make sure you have hugginface-cli installed:
238
 
239
+ ```
240
+ pip install -U "huggingface_hub[cli]"
241
+ ```
242
+
243
+ Then, you can target the specific file you want:
244
+
245
+ ```
246
+ huggingface-cli download {new_repo_url} --include "{gguf_files[0][0]}" --local-dir ./
247
+ ```
248
+
249
+ If the model is bigger than 50GB, it will have been split into multiple files. In order to download them all to a local folder, run:
250
+
251
+ ```
252
+ huggingface-cli download {new_repo_url} --include "{gguf_files[0][0]}/*" --local-dir ./
253
+ ```
254
+
255
+ You can either specify a new local-dir (e.g. deepseek-ai_DeepSeek-V3-0324-Q8_0) or it will be in default hugging face cache
256
+
257
+ </details>
258
+ """
259
+ return model_card
260
 
261
 
262
  def process_model(
263
+ model_id: str,
264
+ q_method: str | list,
265
+ use_imatrix: bool,
266
+ imatrix_q_method: str,
267
+ private_repo: bool,
268
+ train_data_file: gr.File | None,
269
+ split_model: bool,
270
+ split_max_tensors: int,
271
+ split_max_size: str | None,
272
+ export_to_org: bool,
273
+ repo_owner: str,
274
+ org_token: str | None,
275
  oauth_token: gr.OAuthToken | None,
276
+ ) -> tuple[str, str]:
277
  if oauth_token is None or oauth_token.token is None:
278
  raise gr.Error("You must be logged in to use quantize-my-repo")
279
  try:
280
  whoami(oauth_token.token)
281
  except Exception as e:
282
+ raise gr.Error("You must be logged in to use quantize-my-repo") from e
283
 
284
  user_info = whoami(oauth_token.token)
285
  username = user_info["name"]
 
289
 
290
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
291
  logger.info(
292
+ "Time %s, Username %s, Model_ID %s, q_method %s",
293
+ current_time,
294
+ username,
295
+ model_id,
296
+ ",".join(q_method) if isinstance(q_method, list) else q_method,
297
  )
298
 
299
  repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
300
  model_name = model_id.split("/")[-1]
301
  try:
302
+ api_token = org_token if (export_to_org and org_token) else oauth_token.token
 
 
303
  api = HfApi(token=api_token)
304
 
305
  dl_pattern = ["*.md", "*.json", "*.model"]
 
311
  )
312
  else "*.bin"
313
  )
314
+ dl_pattern.append(pattern)
315
 
316
  os.makedirs(downloads_dir, exist_ok=True)
317
  os.makedirs(outputs_dir, exist_ok=True)
 
320
  fp16 = str(Path(outdir) / f"{model_name}.fp16.gguf")
321
 
322
  with tempfile.TemporaryDirectory(dir=downloads_dir) as tmpdir:
323
+ logger.info("Start download")
 
 
 
324
  local_dir = Path(tmpdir) / model_name
325
  api.snapshot_download(
326
  repo_id=model_id,
 
334
  if os.path.exists(adapter_config_dir) and not os.path.exists(
335
  config_dir
336
  ):
337
+ raise RuntimeError(
338
  "adapter_config.json is present. If converting LoRA, use GGUF-my-lora."
339
  )
340
+ logger.info("Download successfully")
 
 
 
 
 
 
 
341
 
342
  result = subprocess.run(
343
  [
 
352
  shell=False,
353
  capture_output=True,
354
  )
355
+ logger.info("Converted to f16")
 
 
 
 
 
356
 
357
  if result.returncode != 0:
358
+ raise RuntimeError(
359
  f"Error converting to fp16: {result.stderr.decode()}"
360
  )
361
  shutil.rmtree(downloads_dir)
 
368
  else "llama.cpp/groups_merged.txt"
369
  )
370
  if not os.path.isfile(train_data_path):
371
+ raise FileNotFoundError(
372
+ f"Training data not found: {train_data_path}"
373
+ )
374
  generate_importance_matrix(fp16, train_data_path, imatrix_path)
375
 
376
  quant_methods = (
 
382
 
383
  gguf_files = []
384
  for method in quant_methods:
385
+ logger.info("Begin quantize")
 
 
 
 
386
  name = (
387
  f"{model_name.lower()}-{method.lower()}-{suffix}.gguf"
388
  if suffix
 
403
  )
404
  result = subprocess.run(quant_cmd, shell=False, capture_output=True)
405
  if result.returncode != 0:
406
+ raise RuntimeError(
407
  f"Quantization failed ({method}): {result.stderr.decode()}"
408
  )
409
  size = os.path.getsize(path) / 1024 / 1024 / 1024
410
  gguf_files.append((name, path, size, method))
411
 
412
+ logger.info("Quantize successfully!")
 
 
 
 
 
413
 
414
  suffix_for_repo = (
415
  f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
 
420
  )
421
 
422
  try:
423
+ original_card = ModelCard.load(model_id, token=oauth_token.token)
424
+ except Exception:
425
+ original_card = ModelCard("")
426
+
427
+ card = get_new_model_card(
428
+ original_card, model_id, gguf_files, new_repo_url, split_model
 
429
  )
430
  readme_path = Path(outdir) / "README.md"
431
  card.save(readme_path)
 
469
  css = """/* Custom CSS to allow scrolling */
470
  .gradio-container {overflow-y: auto;}
471
  """
472
+
473
  model_id = HuggingfaceHubSearch(
474
  label="Hub Model ID",
475
  placeholder="Search for model id on Huggingface",
 
570
  description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
571
  api_name=False,
572
  )
573
+
574
  with gr.Blocks(css=".gradio-container {overflow-y: auto;}") as demo:
575
  gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
576
  gr.LoginButton(min_width=250)