Brianpuz commited on
Commit
3ea29f2
·
verified ·
1 Parent(s): f9cd238

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -87
app.py CHANGED
@@ -14,8 +14,67 @@ import numpy as np
14
 
15
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
16
  CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
 
 
 
 
 
 
 
 
 
 
17
  logger = logging.getLogger(__name__)
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def get_repo_namespace(repo_owner, username, user_orgs):
20
  if repo_owner == 'self':
21
  return username
@@ -141,94 +200,86 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
141
 
142
 
143
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
144
- print(f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}")
145
 
146
  repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
147
  model_name = model_id.split('/')[-1]
148
- api_token = org_token if (export_to_org and org_token!="") else oauth_token.token
149
- api = HfApi(token=api_token)
150
-
151
- dl_pattern = ["*.md", "*.json", "*.model"]
152
- pattern = "*.safetensors" if any(
153
- f.path.endswith(".safetensors")
154
- for f in api.list_repo_tree(repo_id=model_id, recursive=True)
155
- ) else "*.bin"
156
- dl_pattern += [pattern]
157
-
158
- os.makedirs("downloads", exist_ok=True)
159
- os.makedirs("outputs", exist_ok=True)
160
-
161
- with tempfile.TemporaryDirectory(dir="outputs") as outdir:
162
- fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
163
-
164
- with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
165
- local_dir = Path(tmpdir)/model_name
166
- api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
167
-
168
- config_dir = local_dir/"config.json"
169
- adapter_config_dir = local_dir/"adapter_config.json"
170
- if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
171
- raise Exception("adapter_config.json is present. If converting LoRA, use GGUF-my-lora.")
172
-
173
- result = subprocess.run(["python", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16], shell=False, capture_output=True)
174
- if result.returncode != 0:
175
- raise Exception(f"Error converting to fp16: {result.stderr.decode()}")
176
-
177
- imatrix_path = Path(outdir)/"imatrix.dat"
178
- if use_imatrix:
179
- train_data_path = train_data_file.name if train_data_file else "llama.cpp/groups_merged.txt"
180
- if not os.path.isfile(train_data_path):
181
- raise Exception(f"Training data not found: {train_data_path}")
182
- generate_importance_matrix(fp16, train_data_path, imatrix_path)
183
-
184
- quant_methods = [imatrix_q_method] if use_imatrix else (q_method if isinstance(q_method, list) else [q_method])
185
- suffix = "imat" if use_imatrix else None
186
-
187
- gguf_files = []
188
- for method in quant_methods:
189
- name = f"{model_name.lower()}-{method.lower()}-{suffix}.gguf" if suffix else f"{model_name.lower()}-{method.lower()}.gguf"
190
- path = str(Path(outdir)/name)
191
- quant_cmd = ["./llama.cpp/llama-quantize", "--imatrix", imatrix_path, fp16, path, method] if use_imatrix else ["./llama.cpp/llama-quantize", fp16, path, method]
192
- result = subprocess.run(quant_cmd, shell=False, capture_output=True)
193
- if result.returncode != 0:
194
- raise Exception(f"Quantization failed ({method}): {result.stderr.decode()}")
195
- gguf_files.append((name, path))
196
-
197
- suffix_for_repo = f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
198
- repo_id = f"{repo_namespace}/{model_name}-{suffix_for_repo}-GGUF"
199
- new_repo_url = api.create_repo(repo_id=repo_id, exist_ok=True, private=private_repo)
200
-
201
- try:
202
- card = ModelCard.load(model_id, token=oauth_token.token)
203
- except:
204
- card = ModelCard("")
205
- card.data.tags = (card.data.tags or []) + ["llama-cpp", "gguf-my-repo"]
206
- card.data.base_model = model_id
207
- card.text = dedent(f"""
208
- # {repo_id}
209
- Absolutely tremendous! This repo features **GGUF quantized** versions of [{model_id}](https://huggingface.co/{model_id}) — made possible using the *very powerful* `llama.cpp`. Believe me, it's fast, it's smart, it's winning.
210
- ## Quantized Versions:
211
- Only the best quantization. You’ll love it.
212
- ## Run with llama.cpp
213
- Just plug it in, hit the command line, and boom — you're running world-class AI, folks:
214
- ```bash
215
- llama-cli --hf-repo {repo_id} --hf-file {gguf_files[0][0]} -p "AI First, but also..."
216
- ```
217
- This beautiful Hugging Face Space was brought to you by the **amazing team at [Antigma Labs](https://antigma.ai)**. Great people. Big vision. Doing things that matter — and doing them right.
218
- Total winners.
219
- """)
220
- readme_path = Path(outdir)/"README.md"
221
- card.save(readme_path)
222
- for name, path in gguf_files:
223
- if split_model:
224
- split_upload_model(path, outdir, repo_id, oauth_token, split_max_tensors, split_max_size, org_token, export_to_org)
225
- else:
226
- api.upload_file(path_or_fileobj=path, path_in_repo=name, repo_id=repo_id)
227
- if use_imatrix and os.path.isfile(imatrix_path):
228
- api.upload_file(path_or_fileobj=imatrix_path, path_in_repo="imatrix.dat", repo_id=repo_id)
229
- api.upload_file(path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=repo_id)
230
-
231
- return (f'<h1>✅ DONE</h1><br/>Repo: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>', f"llama{np.random.randint(9)}.png")
232
 
233
 
234
  css="""/* Custom CSS to allow scrolling */
@@ -339,7 +390,7 @@ iface = gr.Interface(
339
  description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
340
  api_name=False
341
  )
342
- with gr.Blocks(css=".gradio-container {overflow-y: auto;}",theme=gr.themes.Glass()) as demo:
343
  gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
344
  gr.LoginButton(min_width=250)
345
 
@@ -353,7 +404,6 @@ with gr.Blocks(css=".gradio-container {overflow-y: auto;}",theme=gr.themes.Glass
353
  iface.render()
354
 
355
 
356
-
357
  def restart_space():
358
  HfApi().restart_space(repo_id="Antigma/quantize-my-repo", token=HF_TOKEN, factory_reboot=True)
359
 
 
14
 
15
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
16
  CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
17
+
18
+ log_dir = "/data/logs"
19
+ os.makedirs(log_dir, exist_ok=True)
20
+
21
+ logging.basicConfig(
22
+ filename=os.path.join(log_dir, "app.log"),
23
+ level=logging.INFO,
24
+ format="%(asctime)s - %(levelname)s - %(message)s"
25
+ )
26
+
27
  logger = logging.getLogger(__name__)
28
 
29
+ def get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id = None,):
30
+ try:
31
+ result = subprocess.run(
32
+ ['git', '-C', './llama.cpp', 'describe', '--tags', '--always'],
33
+ stdout=subprocess.PIPE,
34
+ stderr=subprocess.PIPE,
35
+ check=True,
36
+ text=True
37
+ )
38
+ version = result.stdout.strip().split('-')[0]
39
+ text = f"""
40
+ *Produced by [Antigma Labs](https://antigma.ai)*
41
+ ## llama.cpp quantization
42
+ Using <a href="https://github.com/ggml-org/llama.cpp">llama.cpp</a> release <a href="https://github.com/ggml-org/llama.cpp/releases/tag/{version}">b4944</a> for quantization.
43
+ Original model: https://huggingface.co/{model_id}
44
+ Run them directly with [llama.cpp](https://github.com/ggml-org/llama.cpp), or any other llama.cpp based project
45
+ ## Prompt format
46
+ ```
47
+ <|begin▁of▁sentence|>{{system_prompt}}<|User|>{{prompt}}<|Assistant|><|end▁of▁sentence|><|Assistant|>
48
+ ```
49
+ ## Download a file (not the whole branch) from below:
50
+ | Filename | Quant type | File Size | Split |
51
+ | -------- | ---------- | --------- | ----- |
52
+ | {'|'.join(['|'.join([gguf_files[i][0][:-5] if split_model else ('['+gguf_files[i][0]+']'+'(' + new_repo_url+'/blob/main/'+gguf_files[i][0] + ')'), gguf_files[i][3], f"{gguf_files[i][2]:.2f}" + ' GB', str(split_model),'''
53
+ ''']) for i in range(len(gguf_files))]) }
54
+ ## Downloading using huggingface-cli
55
+ <details>
56
+ <summary>Click to view download instructions</summary>
57
+ First, make sure you have hugginface-cli installed:
58
+ ```
59
+ pip install -U "huggingface_hub[cli]"
60
+ ```
61
+ Then, you can target the specific file you want:
62
+ ```
63
+ huggingface-cli download {new_repo_url} --include "{gguf_files[0][0]}" --local-dir ./
64
+ ```
65
+ If the model is bigger than 50GB, it will have been split into multiple files. In order to download them all to a local folder, run:
66
+ ```
67
+ huggingface-cli download {new_repo_url} --include "{gguf_files[0][0]}/*" --local-dir ./
68
+ ```
69
+ You can either specify a new local-dir (deepseek-ai_DeepSeek-V3-0324-Q8_0) or download them all in place (./)
70
+ </details>
71
+ """
72
+ return text
73
+ except subprocess.CalledProcessError as e:
74
+ print("Error:", e.stderr.strip())
75
+ return None
76
+
77
+
78
  def get_repo_namespace(repo_owner, username, user_orgs):
79
  if repo_owner == 'self':
80
  return username
 
200
 
201
 
202
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
203
+ logger.info(f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}")
204
 
205
  repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
206
  model_name = model_id.split('/')[-1]
207
+ try:
208
+ api_token = org_token if (export_to_org and org_token!="") else oauth_token.token
209
+ api = HfApi(token=api_token)
210
+
211
+ dl_pattern = ["*.md", "*.json", "*.model"]
212
+ pattern = "*.safetensors" if any(
213
+ f.path.endswith(".safetensors")
214
+ for f in api.list_repo_tree(repo_id=model_id, recursive=True)
215
+ ) else "*.bin"
216
+ dl_pattern += [pattern]
217
+
218
+ os.makedirs("downloads", exist_ok=True)
219
+ os.makedirs("outputs", exist_ok=True)
220
+
221
+ with tempfile.TemporaryDirectory(dir="outputs") as outdir:
222
+ fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
223
+
224
+ with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
225
+ local_dir = Path(tmpdir)/model_name
226
+ api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
227
+
228
+ config_dir = local_dir/"config.json"
229
+ adapter_config_dir = local_dir/"adapter_config.json"
230
+ if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
231
+ raise Exception("adapter_config.json is present. If converting LoRA, use GGUF-my-lora.")
232
+
233
+ result = subprocess.run(["python", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16], shell=False, capture_output=True)
234
+ if result.returncode != 0:
235
+ raise Exception(f"Error converting to fp16: {result.stderr.decode()}")
236
+
237
+ imatrix_path = Path(outdir)/"imatrix.dat"
238
+ if use_imatrix:
239
+ train_data_path = train_data_file.name if train_data_file else "llama.cpp/groups_merged.txt"
240
+ if not os.path.isfile(train_data_path):
241
+ raise Exception(f"Training data not found: {train_data_path}")
242
+ generate_importance_matrix(fp16, train_data_path, imatrix_path)
243
+
244
+ quant_methods = [imatrix_q_method] if use_imatrix else (q_method if isinstance(q_method, list) else [q_method])
245
+ suffix = "imat" if use_imatrix else None
246
+
247
+ gguf_files = []
248
+ for method in quant_methods:
249
+ name = f"{model_name.lower()}-{method.lower()}-{suffix}.gguf" if suffix else f"{model_name.lower()}-{method.lower()}.gguf"
250
+ path = str(Path(outdir)/name)
251
+ quant_cmd = ["./llama.cpp/llama-quantize", "--imatrix", imatrix_path, fp16, path, method] if use_imatrix else ["./llama.cpp/llama-quantize", fp16, path, method]
252
+ result = subprocess.run(quant_cmd, shell=False, capture_output=True)
253
+ if result.returncode != 0:
254
+ raise Exception(f"Quantization failed ({method}): {result.stderr.decode()}")
255
+ size = os.path.getsize(path)/1024/1024/1024
256
+ gguf_files.append((name, path, size, method))
257
+
258
+ suffix_for_repo = f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
259
+ repo_id = f"{repo_namespace}/{model_name}-{suffix_for_repo}-GGUF"
260
+ new_repo_url = api.create_repo(repo_id=repo_id, exist_ok=True, private=private_repo)
261
+
262
+ try:
263
+ card = ModelCard.load(model_id, token=oauth_token.token)
264
+ except:
265
+ card = ModelCard("")
266
+ card.data.tags = (card.data.tags or []) + ["llama-cpp", "gguf-my-repo"]
267
+ card.data.base_model = model_id
268
+ card.text = dedent(get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id))
269
+ readme_path = Path(outdir)/"README.md"
270
+ card.save(readme_path)
271
+ for name, path, _, _ in gguf_files:
272
+ if split_model:
273
+ split_upload_model(path, outdir, repo_id, oauth_token, split_max_tensors, split_max_size, org_token, export_to_org)
274
+ else:
275
+ api.upload_file(path_or_fileobj=path, path_in_repo=name, repo_id=repo_id)
276
+ if use_imatrix and os.path.isfile(imatrix_path):
277
+ api.upload_file(path_or_fileobj=imatrix_path, path_in_repo="imatrix.dat", repo_id=repo_id)
278
+ api.upload_file(path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=repo_id)
279
+
280
+ return (f'<h1>✅ DONE</h1><br/>Repo: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>', f"llama{np.random.randint(9)}.png")
281
+ except Exception as e:
282
+ raise (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
 
 
 
 
 
 
 
 
283
 
284
 
285
  css="""/* Custom CSS to allow scrolling */
 
390
  description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
391
  api_name=False
392
  )
393
+ with gr.Blocks(css=".gradio-container {overflow-y: auto;}") as demo:
394
  gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
395
  gr.LoginButton(min_width=250)
396
 
 
404
  iface.render()
405
 
406
 
 
407
  def restart_space():
408
  HfApi().restart_space(repo_id="Antigma/quantize-my-repo", token=HF_TOKEN, factory_reboot=True)
409