Spaces:

Antigma
/

quantize-my-repo

Runtime error

App Files Files Community

Brianpuz commited on Apr 8

Commit

3ea29f2

verified ·

1 Parent(s): f9cd238

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -87

app.py CHANGED Viewed

@@ -14,8 +14,67 @@ import numpy as np
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
 logger = logging.getLogger(__name__)
 def get_repo_namespace(repo_owner, username, user_orgs):
     if repo_owner == 'self':
         return username
@@ -141,94 +200,86 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    print(f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}")
     repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
     model_name = model_id.split('/')[-1]
-    api_token = org_token if (export_to_org and org_token!="") else oauth_token.token
-    api = HfApi(token=api_token)
-    dl_pattern = ["*.md", "*.json", "*.model"]
-    pattern = "*.safetensors" if any(
-        f.path.endswith(".safetensors")
-        for f in api.list_repo_tree(repo_id=model_id, recursive=True)
-    ) else "*.bin"
-    dl_pattern += [pattern]
-    os.makedirs("downloads", exist_ok=True)
-    os.makedirs("outputs", exist_ok=True)
-    with tempfile.TemporaryDirectory(dir="outputs") as outdir:
-        fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
-        with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
-            local_dir = Path(tmpdir)/model_name
-            api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
-            config_dir = local_dir/"config.json"
-            adapter_config_dir = local_dir/"adapter_config.json"
-            if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
-                raise Exception("adapter_config.json is present. If converting LoRA, use GGUF-my-lora.")
-            result = subprocess.run(["python", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16], shell=False, capture_output=True)
-            if result.returncode != 0:
-                raise Exception(f"Error converting to fp16: {result.stderr.decode()}")
-        imatrix_path = Path(outdir)/"imatrix.dat"
-        if use_imatrix:
-            train_data_path = train_data_file.name if train_data_file else "llama.cpp/groups_merged.txt"
-            if not os.path.isfile(train_data_path):
-                raise Exception(f"Training data not found: {train_data_path}")
-            generate_importance_matrix(fp16, train_data_path, imatrix_path)
-        quant_methods = [imatrix_q_method] if use_imatrix else (q_method if isinstance(q_method, list) else [q_method])
-        suffix = "imat" if use_imatrix else None
-        gguf_files = []
-        for method in quant_methods:
-            name = f"{model_name.lower()}-{method.lower()}-{suffix}.gguf" if suffix else f"{model_name.lower()}-{method.lower()}.gguf"
-            path = str(Path(outdir)/name)
-            quant_cmd = ["./llama.cpp/llama-quantize", "--imatrix", imatrix_path, fp16, path, method] if use_imatrix else ["./llama.cpp/llama-quantize", fp16, path, method]
-            result = subprocess.run(quant_cmd, shell=False, capture_output=True)
-            if result.returncode != 0:
-                raise Exception(f"Quantization failed ({method}): {result.stderr.decode()}")
-            gguf_files.append((name, path))
-        suffix_for_repo = f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
-        repo_id = f"{repo_namespace}/{model_name}-{suffix_for_repo}-GGUF"
-        new_repo_url = api.create_repo(repo_id=repo_id, exist_ok=True, private=private_repo)
-        try:
-            card = ModelCard.load(model_id, token=oauth_token.token)
-        except:
-            card = ModelCard("")
-        card.data.tags = (card.data.tags or []) + ["llama-cpp", "gguf-my-repo"]
-        card.data.base_model = model_id
-        card.text = dedent(f"""
-            # {repo_id}
-            Absolutely tremendous! This repo features **GGUF quantized** versions of [{model_id}](https://huggingface.co/{model_id}) — made possible using the *very powerful* `llama.cpp`. Believe me, it's fast, it's smart, it's winning.
-            ## Quantized Versions:
-            Only the best quantization. You’ll love it.
-            ## Run with llama.cpp
-            Just plug it in, hit the command line, and boom — you're running world-class AI, folks:
-            ```bash
-            llama-cli --hf-repo {repo_id} --hf-file {gguf_files[0][0]} -p "AI First, but also..."
-            ```
-            This beautiful Hugging Face Space was brought to you by the **amazing team at [Antigma Labs](https://antigma.ai)**. Great people. Big vision. Doing things that matter — and doing them right.
-            Total winners.
-        """)
-        readme_path = Path(outdir)/"README.md"
-        card.save(readme_path)
-        for name, path in gguf_files:
-            if split_model:
-                split_upload_model(path, outdir, repo_id, oauth_token, split_max_tensors, split_max_size, org_token, export_to_org)
-            else:
-                api.upload_file(path_or_fileobj=path, path_in_repo=name, repo_id=repo_id)
-        if use_imatrix and os.path.isfile(imatrix_path):
-            api.upload_file(path_or_fileobj=imatrix_path, path_in_repo="imatrix.dat", repo_id=repo_id)
-        api.upload_file(path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=repo_id)
-        return (f'<h1>✅ DONE</h1><br/>Repo: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>', f"llama{np.random.randint(9)}.png")
 css="""/* Custom CSS to allow scrolling */
@@ -339,7 +390,7 @@ iface = gr.Interface(
     description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
     api_name=False
 )
-with gr.Blocks(css=".gradio-container {overflow-y: auto;}",theme=gr.themes.Glass()) as demo:
     gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
     gr.LoginButton(min_width=250)
@@ -353,7 +404,6 @@ with gr.Blocks(css=".gradio-container {overflow-y: auto;}",theme=gr.themes.Glass
     iface.render()
 def restart_space():
     HfApi().restart_space(repo_id="Antigma/quantize-my-repo", token=HF_TOKEN, factory_reboot=True)

 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
+log_dir = "/data/logs"
+os.makedirs(log_dir, exist_ok=True)
+logging.basicConfig(
+    filename=os.path.join(log_dir, "app.log"),
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
+def get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id = None,):
+    try:
+        result = subprocess.run(
+            ['git', '-C', './llama.cpp', 'describe', '--tags', '--always'],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=True,
+            text=True
+        )
+        version = result.stdout.strip().split('-')[0]
+        text = f"""
+*Produced by [Antigma Labs](https://antigma.ai)*
+## llama.cpp quantization
+Using <a href="https://github.com/ggml-org/llama.cpp">llama.cpp</a> release <a href="https://github.com/ggml-org/llama.cpp/releases/tag/{version}">b4944</a> for quantization.
+Original model: https://huggingface.co/{model_id}
+Run them directly with [llama.cpp](https://github.com/ggml-org/llama.cpp), or any other llama.cpp based project
+## Prompt format
+```
+<｜begin▁of▁sentence｜>{{system_prompt}}<｜User｜>{{prompt}}<｜Assistant｜><｜end▁of▁sentence｜><｜Assistant｜>
+```
+## Download a file (not the whole branch) from below:
+| Filename | Quant type | File Size | Split |
+| -------- | ---------- | --------- | ----- |
+| {'|'.join(['|'.join([gguf_files[i][0][:-5] if split_model else ('['+gguf_files[i][0]+']'+'(' + new_repo_url+'/blob/main/'+gguf_files[i][0] + ')'), gguf_files[i][3], f"{gguf_files[i][2]:.2f}" + ' GB', str(split_model),'''
+''']) for i in range(len(gguf_files))]) }
+## Downloading using huggingface-cli
+<details>
+  <summary>Click to view download instructions</summary>
+First, make sure you have hugginface-cli installed:
+```
+pip install -U "huggingface_hub[cli]"
+```
+Then, you can target the specific file you want:
+```
+huggingface-cli download {new_repo_url} --include "{gguf_files[0][0]}" --local-dir ./
+```
+If the model is bigger than 50GB, it will have been split into multiple files. In order to download them all to a local folder, run:
+```
+huggingface-cli download {new_repo_url} --include "{gguf_files[0][0]}/*" --local-dir ./
+```
+You can either specify a new local-dir (deepseek-ai_DeepSeek-V3-0324-Q8_0) or download them all in place (./)
+</details>
+"""
+        return text
+    except subprocess.CalledProcessError as e:
+        print("Error:", e.stderr.strip())
+        return None
 def get_repo_namespace(repo_owner, username, user_orgs):
     if repo_owner == 'self':
         return username
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    logger.info(f"Time {current_time}, Username {username}, Model_ID, {model_id}, q_method {','.join(q_method)}")
     repo_namespace = get_repo_namespace(repo_owner, username, user_orgs)
     model_name = model_id.split('/')[-1]
+    try:
+      api_token = org_token if (export_to_org and org_token!="") else oauth_token.token
+      api = HfApi(token=api_token)
+      dl_pattern = ["*.md", "*.json", "*.model"]
+      pattern = "*.safetensors" if any(
+          f.path.endswith(".safetensors")
+          for f in api.list_repo_tree(repo_id=model_id, recursive=True)
+      ) else "*.bin"
+      dl_pattern += [pattern]
+      os.makedirs("downloads", exist_ok=True)
+      os.makedirs("outputs", exist_ok=True)
+      with tempfile.TemporaryDirectory(dir="outputs") as outdir:
+          fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
+          with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
+              local_dir = Path(tmpdir)/model_name
+              api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
+              config_dir = local_dir/"config.json"
+              adapter_config_dir = local_dir/"adapter_config.json"
+              if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
+                  raise Exception("adapter_config.json is present. If converting LoRA, use GGUF-my-lora.")
+              result = subprocess.run(["python", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16], shell=False, capture_output=True)
+              if result.returncode != 0:
+                  raise Exception(f"Error converting to fp16: {result.stderr.decode()}")
+          imatrix_path = Path(outdir)/"imatrix.dat"
+          if use_imatrix:
+              train_data_path = train_data_file.name if train_data_file else "llama.cpp/groups_merged.txt"
+              if not os.path.isfile(train_data_path):
+                  raise Exception(f"Training data not found: {train_data_path}")
+              generate_importance_matrix(fp16, train_data_path, imatrix_path)
+          quant_methods = [imatrix_q_method] if use_imatrix else (q_method if isinstance(q_method, list) else [q_method])
+          suffix = "imat" if use_imatrix else None
+          gguf_files = []
+          for method in quant_methods:
+              name = f"{model_name.lower()}-{method.lower()}-{suffix}.gguf" if suffix else f"{model_name.lower()}-{method.lower()}.gguf"
+              path = str(Path(outdir)/name)
+              quant_cmd = ["./llama.cpp/llama-quantize", "--imatrix", imatrix_path, fp16, path, method] if use_imatrix else ["./llama.cpp/llama-quantize", fp16, path, method]
+              result = subprocess.run(quant_cmd, shell=False, capture_output=True)
+              if result.returncode != 0:
+                  raise Exception(f"Quantization failed ({method}): {result.stderr.decode()}")
+              size = os.path.getsize(path)/1024/1024/1024
+              gguf_files.append((name, path, size, method))
+          suffix_for_repo = f"{imatrix_q_method}-imat" if use_imatrix else "-".join(quant_methods)
+          repo_id = f"{repo_namespace}/{model_name}-{suffix_for_repo}-GGUF"
+          new_repo_url = api.create_repo(repo_id=repo_id, exist_ok=True, private=private_repo)
+          try:
+              card = ModelCard.load(model_id, token=oauth_token.token)
+          except:
+              card = ModelCard("")
+          card.data.tags = (card.data.tags or []) + ["llama-cpp", "gguf-my-repo"]
+          card.data.base_model = model_id
+          card.text = dedent(get_llama_cpp_notes(gguf_files, new_repo_url, split_model, model_id))
+          readme_path = Path(outdir)/"README.md"
+          card.save(readme_path)
+          for name, path, _, _ in gguf_files:
+              if split_model:
+                  split_upload_model(path, outdir, repo_id, oauth_token, split_max_tensors, split_max_size, org_token, export_to_org)
+              else:
+                  api.upload_file(path_or_fileobj=path, path_in_repo=name, repo_id=repo_id)
+          if use_imatrix and os.path.isfile(imatrix_path):
+              api.upload_file(path_or_fileobj=imatrix_path, path_in_repo="imatrix.dat", repo_id=repo_id)
+          api.upload_file(path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=repo_id)
+          return (f'<h1>✅ DONE</h1><br/>Repo: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>', f"llama{np.random.randint(9)}.png")
+    except Exception as e:
+        raise (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
 css="""/* Custom CSS to allow scrolling */
     description="We take your Hugging Face repo — a terrific repo — we quantize it, we package it beautifully, and we give you your very own repo. It's smart. It's efficient. It's huge. You're gonna love it.",
     api_name=False
 )
+with gr.Blocks(css=".gradio-container {overflow-y: auto;}") as demo:
     gr.Markdown("Logged in, you must be. Classy, secure, and victorious, it keeps us.")
     gr.LoginButton(min_width=250)
     iface.render()
 def restart_space():
     HfApi().restart_space(repo_id="Antigma/quantize-my-repo", token=HF_TOKEN, factory_reboot=True)