Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -221,58 +221,48 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
221 |
if card.data.tags is None:
|
222 |
card.data.tags = []
|
223 |
card.data.tags.append("llama-cpp")
|
224 |
-
card.data.tags.append("gguf-my-repo")
|
225 |
card.data.base_model = model_id
|
|
|
226 |
card.text = dedent(
|
227 |
f"""
|
228 |
# {new_repo_id}
|
229 |
-
This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id})
|
230 |
Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
|
231 |
-
|
232 |
-
## Use with llama.cpp
|
233 |
-
Install llama.cpp through brew (works on Mac and Linux)
|
234 |
-
|
235 |
-
```bash
|
236 |
-
brew install llama.cpp
|
237 |
-
|
238 |
-
```
|
239 |
-
Invoke the llama.cpp server or the CLI.
|
240 |
-
|
241 |
-
### CLI:
|
242 |
-
```bash
|
243 |
-
llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
|
244 |
-
```
|
245 |
-
|
246 |
-
### Server:
|
247 |
-
```bash
|
248 |
-
llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
|
249 |
-
```
|
250 |
-
|
251 |
-
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
|
252 |
-
|
253 |
-
Step 1: Clone llama.cpp from GitHub.
|
254 |
-
```
|
255 |
-
git clone https://github.com/ggerganov/llama.cpp
|
256 |
-
```
|
257 |
-
|
258 |
-
Step 2: Move into the llama.cpp folder and build it with `LLAMA_CURL=1` flag along with other hardware-specific flags (for ex: LLAMA_CUDA=1 for Nvidia GPUs on Linux).
|
259 |
-
```
|
260 |
-
cd llama.cpp && LLAMA_CURL=1 make
|
261 |
-
```
|
262 |
-
|
263 |
-
Step 3: Run inference through the main binary.
|
264 |
-
```
|
265 |
-
./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
|
266 |
-
```
|
267 |
-
or
|
268 |
-
```
|
269 |
-
./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
|
270 |
-
```
|
271 |
"""
|
272 |
)
|
273 |
readme_path = Path(outdir)/"README.md"
|
274 |
card.save(readme_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
|
|
|
|
|
|
|
|
|
|
276 |
if split_model:
|
277 |
split_upload_model(str(quantized_gguf_path), outdir, new_repo_id, oauth_token, split_max_tensors, split_max_size)
|
278 |
else:
|
@@ -424,7 +414,7 @@ with gr.Blocks(css=css) as demo:
|
|
424 |
)
|
425 |
|
426 |
def restart_space():
|
427 |
-
HfApi().restart_space(repo_id="
|
428 |
|
429 |
scheduler = BackgroundScheduler()
|
430 |
scheduler.add_job(restart_space, "interval", seconds=21600)
|
|
|
221 |
if card.data.tags is None:
|
222 |
card.data.tags = []
|
223 |
card.data.tags.append("llama-cpp")
|
|
|
224 |
card.data.base_model = model_id
|
225 |
+
|
226 |
card.text = dedent(
|
227 |
f"""
|
228 |
# {new_repo_id}
|
229 |
+
This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id})
|
230 |
Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
"""
|
232 |
)
|
233 |
readme_path = Path(outdir)/"README.md"
|
234 |
card.save(readme_path)
|
235 |
+
|
236 |
+
|
237 |
+
# Quant listesi oluşturma
|
238 |
+
quant_list = f"""
|
239 |
+
## ✅ Quantized Models Download List
|
240 |
+
**✨ Recommended for CPU:** `Q4_K_M` | **⚡ Recommended for ARM CPU:** `Q4_0` | **🏆 Best Quality:** `Q8_0`
|
241 |
+
|
242 |
+
| 🚀 Download | 🔢 Type | 📝 Notes |
|
243 |
+
|:---------|:-----|:------|
|
244 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q2_k.gguf) |  | Basic quantization |
|
245 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q3_k_s.gguf) |  | Small size |
|
246 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q3_k_m.gguf) |  | Balanced quality |
|
247 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q3_k_l.gguf) |  | Better quality |
|
248 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q4_0.gguf) |  | Fast on ARM |
|
249 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q4_k_s.gguf) |  | Fast, recommended |
|
250 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q4_k_m.gguf) |  ⭐ | Best balance |
|
251 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q5_0.gguf) |  | Good quality |
|
252 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q5_k_s.gguf) |  | Balanced |
|
253 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q5_k_m.gguf) |  | High quality |
|
254 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q6_k.gguf) |  🏆 | Very good quality |
|
255 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-q8_0.gguf) |  ⚡ | Fast, best quality |
|
256 |
+
| [Download](https://huggingface.co/{new_repo_id}/resolve/main/{model_name.lower()}-f16.gguf) |  | Maximum accuracy |
|
257 |
+
|
258 |
+
💡 **Tip:** Use `F16` for maximum precision when quality is critical
|
259 |
+
"""
|
260 |
|
261 |
+
# README'yi güncelle (ModelCard kullanarak)
|
262 |
+
card.text += quant_list
|
263 |
+
readme_path = Path(outdir)/"README.md"
|
264 |
+
card.save(readme_path)
|
265 |
+
|
266 |
if split_model:
|
267 |
split_upload_model(str(quantized_gguf_path), outdir, new_repo_id, oauth_token, split_max_tensors, split_max_size)
|
268 |
else:
|
|
|
414 |
)
|
415 |
|
416 |
def restart_space():
|
417 |
+
HfApi().restart_space(repo_id="matrixportal/all-gguf-same-where", token=HF_TOKEN, factory_reboot=True)
|
418 |
|
419 |
scheduler = BackgroundScheduler()
|
420 |
scheduler.add_job(restart_space, "interval", seconds=21600)
|