matrixportal commited on
Commit
7cee9ee
·
verified ·
1 Parent(s): 50d13dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -43
app.py CHANGED
@@ -221,58 +221,48 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
221
  if card.data.tags is None:
222
  card.data.tags = []
223
  card.data.tags.append("llama-cpp")
224
- card.data.tags.append("gguf-my-repo")
225
  card.data.base_model = model_id
 
226
  card.text = dedent(
227
  f"""
228
  # {new_repo_id}
229
- This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
230
  Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
231
-
232
- ## Use with llama.cpp
233
- Install llama.cpp through brew (works on Mac and Linux)
234
-
235
- ```bash
236
- brew install llama.cpp
237
-
238
- ```
239
- Invoke the llama.cpp server or the CLI.
240
-
241
- ### CLI:
242
- ```bash
243
- llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
244
- ```
245
-
246
- ### Server:
247
- ```bash
248
- llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
249
- ```
250
-
251
- Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
252
-
253
- Step 1: Clone llama.cpp from GitHub.
254
- ```
255
- git clone https://github.com/ggerganov/llama.cpp
256
- ```
257
-
258
- Step 2: Move into the llama.cpp folder and build it with `LLAMA_CURL=1` flag along with other hardware-specific flags (for ex: LLAMA_CUDA=1 for Nvidia GPUs on Linux).
259
- ```
260
- cd llama.cpp && LLAMA_CURL=1 make
261
- ```
262
-
263
- Step 3: Run inference through the main binary.
264
- ```
265
- ./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
266
- ```
267
- or
268
- ```
269
- ./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
270
- ```
271
  """
272
  )
273
  readme_path = Path(outdir)/"README.md"
274
  card.save(readme_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
 
 
 
 
 
276
  if split_model:
277
  split_upload_model(str(quantized_gguf_path), outdir, new_repo_id, oauth_token, split_max_tensors, split_max_size)
278
  else:
@@ -424,7 +414,7 @@ with gr.Blocks(css=css) as demo:
424
  )
425
 
426
  def restart_space():
427
- HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
428
 
429
  scheduler = BackgroundScheduler()
430
  scheduler.add_job(restart_space, "interval", seconds=21600)
 
221
  if card.data.tags is None:
222
  card.data.tags = []
223
  card.data.tags.append("llama-cpp")
 
224
  card.data.base_model = model_id
225
+
226
  card.text = dedent(
227
  f"""
228
  # {new_repo_id}
229
+ This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id})
230
  Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  """
232
  )
233
  readme_path = Path(outdir)/"README.md"
234
  card.save(readme_path)
235
+
236
+
237
+ # Quant listesi oluşturma
238
+ quant_list = f"""
239
+ ## ✅ Quantized Models Download List
240
+ **✨ Recommended for CPU:** `Q4_K_M` | **⚡ Recommended for ARM CPU:** `Q4_0` | **🏆 Best Quality:** `Q8_0`
241
+
242
+ | 🚀 Download | 🔢 Type | 📝 Notes |
243
+ |:---------|:-----|:------|
244
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q2_k.gguf) | ![Q2_K](https://img.shields.io/badge/Q2_K-1A73E8) | Basic quantization |
245
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q3_k_s.gguf) | ![Q3_K_S](https://img.shields.io/badge/Q3_K_S-34A853) | Small size |
246
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q3_k_m.gguf) | ![Q3_K_M](https://img.shields.io/badge/Q3_K_M-FBBC05) | Balanced quality |
247
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q3_k_l.gguf) | ![Q3_K_L](https://img.shields.io/badge/Q3_K_L-4285F4) | Better quality |
248
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q4_0.gguf) | ![Q4_0](https://img.shields.io/badge/Q4_0-EA4335) | Fast on ARM |
249
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q4_k_s.gguf) | ![Q4_K_S](https://img.shields.io/badge/Q4_K_S-673AB7) | Fast, recommended |
250
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q4_k_m.gguf) | ![Q4_K_M](https://img.shields.io/badge/Q4_K_M-673AB7) ⭐ | Best balance |
251
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q5_0.gguf) | ![Q5_0](https://img.shields.io/badge/Q5_0-FF6D01) | Good quality |
252
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q5_k_s.gguf) | ![Q5_K_S](https://img.shields.io/badge/Q5_K_S-0F9D58) | Balanced |
253
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q5_k_m.gguf) | ![Q5_K_M](https://img.shields.io/badge/Q5_K_M-0F9D58) | High quality |
254
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q6_k.gguf) | ![Q6_K](https://img.shields.io/badge/Q6_K-4285F4) 🏆 | Very good quality |
255
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q8_0.gguf) | ![Q8_0](https://img.shields.io/badge/Q8_0-EA4335) ⚡ | Fast, best quality |
256
+ | [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-f16.gguf) | ![F16](https://img.shields.io/badge/F16-000000) | Maximum accuracy |
257
+
258
+ 💡 **Tip:** Use `F16` for maximum precision when quality is critical
259
+ """
260
 
261
+ # README'yi güncelle (ModelCard kullanarak)
262
+ card.text += quant_list
263
+ readme_path = Path(outdir)/"README.md"
264
+ card.save(readme_path)
265
+
266
  if split_model:
267
  split_upload_model(str(quantized_gguf_path), outdir, new_repo_id, oauth_token, split_max_tensors, split_max_size)
268
  else:
 
414
  )
415
 
416
  def restart_space():
417
+ HfApi().restart_space(repo_id="matrixportal/all-gguf-same-where", token=HF_TOKEN, factory_reboot=True)
418
 
419
  scheduler = BackgroundScheduler()
420
  scheduler.add_job(restart_space, "interval", seconds=21600)