Spaces:
Sleeping
Sleeping
File size: 82,153 Bytes
1a2153f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"llama_load_model_from_file: using device Metal (Apple M1) - 17592186044388 MiB free\n",
"llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from ../models/Llama-3-ELYZA-JP-8B-q4_k_m.gguf (version GGUF V3 (latest))\n",
"llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
"llama_model_loader: - kv 0: general.architecture str = llama\n",
"llama_model_loader: - kv 1: general.name str = Llama-3-8B-optimal-merged-stage2\n",
"llama_model_loader: - kv 2: llama.block_count u32 = 32\n",
"llama_model_loader: - kv 3: llama.context_length u32 = 8192\n",
"llama_model_loader: - kv 4: llama.embedding_length u32 = 4096\n",
"llama_model_loader: - kv 5: llama.feed_forward_length u32 = 14336\n",
"llama_model_loader: - kv 6: llama.attention.head_count u32 = 32\n",
"llama_model_loader: - kv 7: llama.attention.head_count_kv u32 = 8\n",
"llama_model_loader: - kv 8: llama.rope.freq_base f32 = 500000.000000\n",
"llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n",
"llama_model_loader: - kv 10: general.file_type u32 = 15\n",
"llama_model_loader: - kv 11: llama.vocab_size u32 = 128256\n",
"llama_model_loader: - kv 12: llama.rope.dimension_count u32 = 128\n",
"llama_model_loader: - kv 13: tokenizer.ggml.model str = gpt2\n",
"llama_model_loader: - kv 14: tokenizer.ggml.pre str = llama-bpe\n",
"llama_model_loader: - kv 15: tokenizer.ggml.tokens arr[str,128256] = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n",
"llama_model_loader: - kv 16: tokenizer.ggml.token_type arr[i32,128256] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n",
"llama_model_loader: - kv 17: tokenizer.ggml.merges arr[str,280147] = [\"Ġ Ġ\", \"Ġ ĠĠĠ\", \"ĠĠ ĠĠ\", \"...\n",
"llama_model_loader: - kv 18: tokenizer.ggml.bos_token_id u32 = 128000\n",
"llama_model_loader: - kv 19: tokenizer.ggml.eos_token_id u32 = 128009\n",
"llama_model_loader: - kv 20: tokenizer.chat_template str = {% set loop_messages = messages %}{% ...\n",
"llama_model_loader: - kv 21: general.quantization_version u32 = 2\n",
"llama_model_loader: - type f32: 65 tensors\n",
"llama_model_loader: - type q4_K: 193 tensors\n",
"llama_model_loader: - type q6_K: 33 tensors\n",
"llm_load_vocab: control token: 128255 '<|reserved_special_token_250|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128253 '<|reserved_special_token_248|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128251 '<|reserved_special_token_246|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128249 '<|reserved_special_token_244|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128248 '<|reserved_special_token_243|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128247 '<|reserved_special_token_242|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128245 '<|reserved_special_token_240|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128244 '<|reserved_special_token_239|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128242 '<|reserved_special_token_237|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128241 '<|reserved_special_token_236|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128240 '<|reserved_special_token_235|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128237 '<|reserved_special_token_232|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128235 '<|reserved_special_token_230|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128232 '<|reserved_special_token_227|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128231 '<|reserved_special_token_226|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128226 '<|reserved_special_token_221|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128224 '<|reserved_special_token_219|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128223 '<|reserved_special_token_218|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128221 '<|reserved_special_token_216|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128220 '<|reserved_special_token_215|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128218 '<|reserved_special_token_213|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128216 '<|reserved_special_token_211|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128215 '<|reserved_special_token_210|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128214 '<|reserved_special_token_209|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128213 '<|reserved_special_token_208|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128212 '<|reserved_special_token_207|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128210 '<|reserved_special_token_205|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128208 '<|reserved_special_token_203|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128207 '<|reserved_special_token_202|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128206 '<|reserved_special_token_201|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128205 '<|reserved_special_token_200|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128204 '<|reserved_special_token_199|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128201 '<|reserved_special_token_196|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128199 '<|reserved_special_token_194|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128194 '<|reserved_special_token_189|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128192 '<|reserved_special_token_187|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128191 '<|reserved_special_token_186|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128188 '<|reserved_special_token_183|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128187 '<|reserved_special_token_182|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128185 '<|reserved_special_token_180|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128184 '<|reserved_special_token_179|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128182 '<|reserved_special_token_177|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128181 '<|reserved_special_token_176|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128180 '<|reserved_special_token_175|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128175 '<|reserved_special_token_170|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128174 '<|reserved_special_token_169|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128173 '<|reserved_special_token_168|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128172 '<|reserved_special_token_167|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128171 '<|reserved_special_token_166|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128170 '<|reserved_special_token_165|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128169 '<|reserved_special_token_164|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128166 '<|reserved_special_token_161|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128164 '<|reserved_special_token_159|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128163 '<|reserved_special_token_158|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128157 '<|reserved_special_token_152|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128156 '<|reserved_special_token_151|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128154 '<|reserved_special_token_149|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128153 '<|reserved_special_token_148|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128151 '<|reserved_special_token_146|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128149 '<|reserved_special_token_144|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128148 '<|reserved_special_token_143|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128147 '<|reserved_special_token_142|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128144 '<|reserved_special_token_139|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128141 '<|reserved_special_token_136|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128139 '<|reserved_special_token_134|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128138 '<|reserved_special_token_133|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128137 '<|reserved_special_token_132|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128130 '<|reserved_special_token_125|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128127 '<|reserved_special_token_122|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128125 '<|reserved_special_token_120|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128124 '<|reserved_special_token_119|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128123 '<|reserved_special_token_118|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128122 '<|reserved_special_token_117|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128121 '<|reserved_special_token_116|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128120 '<|reserved_special_token_115|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128119 '<|reserved_special_token_114|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128118 '<|reserved_special_token_113|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128117 '<|reserved_special_token_112|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128116 '<|reserved_special_token_111|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128113 '<|reserved_special_token_108|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128112 '<|reserved_special_token_107|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128111 '<|reserved_special_token_106|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128110 '<|reserved_special_token_105|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128108 '<|reserved_special_token_103|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128107 '<|reserved_special_token_102|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128104 '<|reserved_special_token_99|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128103 '<|reserved_special_token_98|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128102 '<|reserved_special_token_97|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128101 '<|reserved_special_token_96|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128100 '<|reserved_special_token_95|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128097 '<|reserved_special_token_92|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128094 '<|reserved_special_token_89|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128093 '<|reserved_special_token_88|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128091 '<|reserved_special_token_86|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128090 '<|reserved_special_token_85|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128087 '<|reserved_special_token_82|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128086 '<|reserved_special_token_81|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128084 '<|reserved_special_token_79|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128082 '<|reserved_special_token_77|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128077 '<|reserved_special_token_72|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128074 '<|reserved_special_token_69|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128073 '<|reserved_special_token_68|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128070 '<|reserved_special_token_65|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128067 '<|reserved_special_token_62|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128066 '<|reserved_special_token_61|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128064 '<|reserved_special_token_59|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128061 '<|reserved_special_token_56|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128059 '<|reserved_special_token_54|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128058 '<|reserved_special_token_53|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128057 '<|reserved_special_token_52|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128051 '<|reserved_special_token_46|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128042 '<|reserved_special_token_37|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128041 '<|reserved_special_token_36|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128040 '<|reserved_special_token_35|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128039 '<|reserved_special_token_34|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128035 '<|reserved_special_token_30|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128034 '<|reserved_special_token_29|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128032 '<|reserved_special_token_27|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128031 '<|reserved_special_token_26|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128030 '<|reserved_special_token_25|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128029 '<|reserved_special_token_24|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128027 '<|reserved_special_token_22|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128026 '<|reserved_special_token_21|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128025 '<|reserved_special_token_20|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128023 '<|reserved_special_token_18|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128022 '<|reserved_special_token_17|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128021 '<|reserved_special_token_16|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128019 '<|reserved_special_token_14|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128017 '<|reserved_special_token_12|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128014 '<|reserved_special_token_9|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128013 '<|reserved_special_token_8|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128012 '<|reserved_special_token_7|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128011 '<|reserved_special_token_6|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128010 '<|reserved_special_token_5|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128006 '<|start_header_id|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128005 '<|reserved_special_token_3|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128003 '<|reserved_special_token_1|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128002 '<|reserved_special_token_0|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128000 '<|begin_of_text|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128038 '<|reserved_special_token_33|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128060 '<|reserved_special_token_55|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128043 '<|reserved_special_token_38|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128007 '<|end_header_id|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128062 '<|reserved_special_token_57|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128168 '<|reserved_special_token_163|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128159 '<|reserved_special_token_154|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128162 '<|reserved_special_token_157|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128054 '<|reserved_special_token_49|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128047 '<|reserved_special_token_42|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128053 '<|reserved_special_token_48|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128227 '<|reserved_special_token_222|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128095 '<|reserved_special_token_90|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128150 '<|reserved_special_token_145|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128081 '<|reserved_special_token_76|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128079 '<|reserved_special_token_74|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128099 '<|reserved_special_token_94|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128250 '<|reserved_special_token_245|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128176 '<|reserved_special_token_171|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128068 '<|reserved_special_token_63|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128132 '<|reserved_special_token_127|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128158 '<|reserved_special_token_153|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128161 '<|reserved_special_token_156|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128131 '<|reserved_special_token_126|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128246 '<|reserved_special_token_241|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128254 '<|reserved_special_token_249|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128033 '<|reserved_special_token_28|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128145 '<|reserved_special_token_140|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128178 '<|reserved_special_token_173|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128219 '<|reserved_special_token_214|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128072 '<|reserved_special_token_67|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128238 '<|reserved_special_token_233|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128048 '<|reserved_special_token_43|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128065 '<|reserved_special_token_60|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128146 '<|reserved_special_token_141|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128198 '<|reserved_special_token_193|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128055 '<|reserved_special_token_50|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128143 '<|reserved_special_token_138|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128140 '<|reserved_special_token_135|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128020 '<|reserved_special_token_15|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128036 '<|reserved_special_token_31|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128129 '<|reserved_special_token_124|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128098 '<|reserved_special_token_93|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128209 '<|reserved_special_token_204|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128186 '<|reserved_special_token_181|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128222 '<|reserved_special_token_217|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128126 '<|reserved_special_token_121|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128004 '<|reserved_special_token_2|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128075 '<|reserved_special_token_70|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128160 '<|reserved_special_token_155|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128069 '<|reserved_special_token_64|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128109 '<|reserved_special_token_104|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128183 '<|reserved_special_token_178|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128092 '<|reserved_special_token_87|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128106 '<|reserved_special_token_101|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128096 '<|reserved_special_token_91|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128135 '<|reserved_special_token_130|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128190 '<|reserved_special_token_185|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128196 '<|reserved_special_token_191|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128045 '<|reserved_special_token_40|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128085 '<|reserved_special_token_80|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128189 '<|reserved_special_token_184|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128133 '<|reserved_special_token_128|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128089 '<|reserved_special_token_84|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128155 '<|reserved_special_token_150|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128001 '<|end_of_text|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128046 '<|reserved_special_token_41|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128028 '<|reserved_special_token_23|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128252 '<|reserved_special_token_247|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128179 '<|reserved_special_token_174|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128063 '<|reserved_special_token_58|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128177 '<|reserved_special_token_172|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128230 '<|reserved_special_token_225|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128076 '<|reserved_special_token_71|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128078 '<|reserved_special_token_73|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128228 '<|reserved_special_token_223|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128193 '<|reserved_special_token_188|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128044 '<|reserved_special_token_39|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128080 '<|reserved_special_token_75|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128136 '<|reserved_special_token_131|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128128 '<|reserved_special_token_123|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128115 '<|reserved_special_token_110|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128050 '<|reserved_special_token_45|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128217 '<|reserved_special_token_212|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128105 '<|reserved_special_token_100|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128088 '<|reserved_special_token_83|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128200 '<|reserved_special_token_195|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128056 '<|reserved_special_token_51|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128016 '<|reserved_special_token_11|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128167 '<|reserved_special_token_162|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128202 '<|reserved_special_token_197|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128037 '<|reserved_special_token_32|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128197 '<|reserved_special_token_192|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128233 '<|reserved_special_token_228|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128142 '<|reserved_special_token_137|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128165 '<|reserved_special_token_160|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128211 '<|reserved_special_token_206|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128134 '<|reserved_special_token_129|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128229 '<|reserved_special_token_224|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128236 '<|reserved_special_token_231|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128052 '<|reserved_special_token_47|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128225 '<|reserved_special_token_220|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128203 '<|reserved_special_token_198|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128015 '<|reserved_special_token_10|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128008 '<|reserved_special_token_4|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128195 '<|reserved_special_token_190|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128018 '<|reserved_special_token_13|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128083 '<|reserved_special_token_78|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128071 '<|reserved_special_token_66|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128024 '<|reserved_special_token_19|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128239 '<|reserved_special_token_234|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128152 '<|reserved_special_token_147|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128049 '<|reserved_special_token_44|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128243 '<|reserved_special_token_238|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128114 '<|reserved_special_token_109|>' is not marked as EOG\n",
"llm_load_vocab: control token: 128234 '<|reserved_special_token_229|>' is not marked as EOG\n",
"llm_load_vocab: special tokens cache size = 256\n",
"llm_load_vocab: token to piece cache size = 0.8000 MB\n",
"llm_load_print_meta: format = GGUF V3 (latest)\n",
"llm_load_print_meta: arch = llama\n",
"llm_load_print_meta: vocab type = BPE\n",
"llm_load_print_meta: n_vocab = 128256\n",
"llm_load_print_meta: n_merges = 280147\n",
"llm_load_print_meta: vocab_only = 0\n",
"llm_load_print_meta: n_ctx_train = 8192\n",
"llm_load_print_meta: n_embd = 4096\n",
"llm_load_print_meta: n_layer = 32\n",
"llm_load_print_meta: n_head = 32\n",
"llm_load_print_meta: n_head_kv = 8\n",
"llm_load_print_meta: n_rot = 128\n",
"llm_load_print_meta: n_swa = 0\n",
"llm_load_print_meta: n_embd_head_k = 128\n",
"llm_load_print_meta: n_embd_head_v = 128\n",
"llm_load_print_meta: n_gqa = 4\n",
"llm_load_print_meta: n_embd_k_gqa = 1024\n",
"llm_load_print_meta: n_embd_v_gqa = 1024\n",
"llm_load_print_meta: f_norm_eps = 0.0e+00\n",
"llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n",
"llm_load_print_meta: f_clamp_kqv = 0.0e+00\n",
"llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n",
"llm_load_print_meta: f_logit_scale = 0.0e+00\n",
"llm_load_print_meta: n_ff = 14336\n",
"llm_load_print_meta: n_expert = 0\n",
"llm_load_print_meta: n_expert_used = 0\n",
"llm_load_print_meta: causal attn = 1\n",
"llm_load_print_meta: pooling type = 0\n",
"llm_load_print_meta: rope type = 0\n",
"llm_load_print_meta: rope scaling = linear\n",
"llm_load_print_meta: freq_base_train = 500000.0\n",
"llm_load_print_meta: freq_scale_train = 1\n",
"llm_load_print_meta: n_ctx_orig_yarn = 8192\n",
"llm_load_print_meta: rope_finetuned = unknown\n",
"llm_load_print_meta: ssm_d_conv = 0\n",
"llm_load_print_meta: ssm_d_inner = 0\n",
"llm_load_print_meta: ssm_d_state = 0\n",
"llm_load_print_meta: ssm_dt_rank = 0\n",
"llm_load_print_meta: ssm_dt_b_c_rms = 0\n",
"llm_load_print_meta: model type = 8B\n",
"llm_load_print_meta: model ftype = Q4_K - Medium\n",
"llm_load_print_meta: model params = 8.03 B\n",
"llm_load_print_meta: model size = 4.58 GiB (4.89 BPW) \n",
"llm_load_print_meta: general.name = Llama-3-8B-optimal-merged-stage2\n",
"llm_load_print_meta: BOS token = 128000 '<|begin_of_text|>'\n",
"llm_load_print_meta: EOS token = 128009 '<|eot_id|>'\n",
"llm_load_print_meta: EOT token = 128009 '<|eot_id|>'\n",
"llm_load_print_meta: LF token = 128 'Ä'\n",
"llm_load_print_meta: EOG token = 128009 '<|eot_id|>'\n",
"llm_load_print_meta: max token length = 256\n",
"llm_load_tensors: tensor 'token_embd.weight' (q4_K) (and 0 others) cannot be used with preferred buffer type CPU_AARCH64, using CPU instead\n",
"ggml_backend_metal_log_allocated_size: allocated buffer, size = 4096.00 MiB, ( 9584.69 / 5461.34)\n",
"ggml_backend_metal_log_allocated_size: warning: current allocated size is greater than the recommended max working set size\n",
"\n",
"ggml_backend_metal_log_allocated_size: allocated buffer, size = 1000.31 MiB, (10585.00 / 5461.34)\n",
"ggml_backend_metal_log_allocated_size: warning: current allocated size is greater than the recommended max working set size\n",
"llm_load_tensors: offloading 32 repeating layers to GPU\n",
"llm_load_tensors: offloading output layer to GPU\n",
"llm_load_tensors: offloaded 33/33 layers to GPU\n",
"llm_load_tensors: Metal_Mapped model buffer size = 4685.31 MiB\n",
"llm_load_tensors: CPU_Mapped model buffer size = 281.81 MiB\n",
".......................................................................................\n",
"llama_new_context_with_model: n_seq_max = 1\n",
"llama_new_context_with_model: n_ctx = 512\n",
"llama_new_context_with_model: n_ctx_per_seq = 512\n",
"llama_new_context_with_model: n_batch = 128\n",
"llama_new_context_with_model: n_ubatch = 128\n",
"llama_new_context_with_model: flash_attn = 0\n",
"llama_new_context_with_model: freq_base = 500000.0\n",
"llama_new_context_with_model: freq_scale = 1\n",
"llama_new_context_with_model: n_ctx_per_seq (512) < n_ctx_train (8192) -- the full capacity of the model will not be utilized\n",
"ggml_metal_init: allocating\n",
"ggml_metal_init: found device: Apple M1\n",
"ggml_metal_init: picking default device: Apple M1\n",
"ggml_metal_init: using embedded metal library\n",
"ggml_metal_init: GPU name: Apple M1\n",
"ggml_metal_init: GPU family: MTLGPUFamilyApple7 (1007)\n",
"ggml_metal_init: GPU family: MTLGPUFamilyCommon3 (3003)\n",
"ggml_metal_init: GPU family: MTLGPUFamilyMetal3 (5001)\n",
"ggml_metal_init: simdgroup reduction = true\n",
"ggml_metal_init: simdgroup matrix mul. = true\n",
"ggml_metal_init: has bfloat = true\n",
"ggml_metal_init: use bfloat = false\n",
"ggml_metal_init: hasUnifiedMemory = true\n",
"ggml_metal_init: recommendedMaxWorkingSetSize = 5726.63 MB\n",
"ggml_metal_init: loaded kernel_add 0x10487d590 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_add_row 0x104f3a120 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_sub 0x1051058f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_sub_row 0x104f9fcc0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul 0x104f9ff20 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_row 0x104fa0500 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_div 0x10487d7f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_div_row 0x10487ddd0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_repeat_f32 0x10487e030 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_repeat_f16 0x104fa0760 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_repeat_i32 0x10487e290 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_repeat_i16 0x10487e570 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_scale 0x10487ef10 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_scale_4 0x10487f590 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_clamp 0x104fa0d30 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_tanh 0x104fa1210 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_relu 0x104fa16f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_sigmoid 0x1027cf550 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_gelu 0x1027cfb00 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_gelu_4 0x10511da30 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_gelu_quick 0x104fa1bd0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_gelu_quick_4 0x10511df10 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_silu 0x10511e3f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_silu_4 0x1027d05f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_soft_max_f16 0x104fa1e30 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_soft_max_f16_4 0x104fa2090 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_soft_max_f32 0x10487fde0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_soft_max_f32_4 0x10511e8d0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_diag_mask_inf 0x10511f1a0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_diag_mask_inf_8 0x104fa22f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_f32 0x104fa2550 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_f16 0x104880720 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: skipping kernel_get_rows_bf16 (not supported)\n",
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x105104080 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x104880cb0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q5_0 0x1027d12a0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q5_1 0x1027d1b90 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q8_0 0x1048815d0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x1027d24e0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x1027d2e00 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x104881f30 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x1048825d0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x104fa27b0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq2_xxs 0x1027d34e0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq2_xs 0x1048829e0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq3_xxs 0x105120270 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq3_s 0x1051204d0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq2_s 0x104fa2a10 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq1_s 0x1027d3740 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq1_m 0x104fa2c70 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq4_nl 0x104fa2ed0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_iq4_xs 0x105120d20 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_get_rows_i32 0x104fa3130 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_rms_norm 0x104883500 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_group_norm 0x104fa3390 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_norm 0x104fa35f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_ssm_conv_f32 0x1051209e0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_ssm_scan_f32 0x104884090 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_f32_f32 0x1048842f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: skipping kernel_mul_mv_bf16_f32 (not supported)\n",
"ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row (not supported)\n",
"ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4 (not supported)\n",
"ggml_metal_init: skipping kernel_mul_mv_bf16_bf16 (not supported)\n",
"ggml_metal_init: loaded kernel_mul_mv_f16_f32 0x1048847b0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_f16_f32_1row 0x104885180 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_f16_f32_l4 0x104fa3850 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_f16_f16 0x104885ac0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q4_0_f32 0x104886480 | th_max = 640 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q4_1_f32 0x1027d3e70 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q5_0_f32 0x104fa3b30 | th_max = 640 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q5_1_f32 0x104886f70 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q8_0_f32 0x104fa3d90 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q2_K_f32 0x1051223c0 | th_max = 640 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q3_K_f32 0x1051227c0 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q4_K_f32 0x105123850 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q5_K_f32 0x1027d40d0 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_q6_K_f32 0x1027d4330 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq2_xxs_f32 0x104fa45c0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq2_xs_f32 0x104887b70 | th_max = 704 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq3_xxs_f32 0x104888900 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq3_s_f32 0x104fa4820 | th_max = 640 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq2_s_f32 0x1027d4870 | th_max = 704 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq1_s_f32 0x104889360 | th_max = 448 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq1_m_f32 0x1027d5710 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq4_nl_f32 0x104fa4dc0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_iq4_xs_f32 0x104889770 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_f32_f32 0x10488a040 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_f16_f32 0x10484e1a0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32 (not supported)\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q4_0_f32 0x1027d5970 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q4_1_f32 0x1027d60c0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q5_0_f32 0x104889c40 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q5_1_f32 0x1027d68f0 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q8_0_f32 0x10487ced0 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q2_K_f32 0x10487d2b0 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q3_K_f32 0x10488b2f0 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q4_K_f32 0x10488bce0 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q5_K_f32 0x10488bf80 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_q6_K_f32 0x1027d70c0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq2_xxs_f32 0x1027d7890 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq2_xs_f32 0x10488d050 | th_max = 640 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq3_xxs_f32 0x10488da40 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq3_s_f32 0x10488dce0 | th_max = 640 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq2_s_f32 0x10488e560 | th_max = 640 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq1_s_f32 0x10488f760 | th_max = 448 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq1_m_f32 0x10488fe30 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq4_nl_f32 0x1051094b0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mv_id_iq4_xs_f32 0x104fa5e70 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_f32_f32 0x104891150 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x104fa63b0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: skipping kernel_mul_mm_bf16_f32 (not supported)\n",
"ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x1051197d0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x104fa6eb0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q5_0_f32 0x105124b10 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q5_1_f32 0x104fa77d0 | th_max = 704 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x1051258c0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x104fa8150 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x105125e20 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x104891680 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x1048918e0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x104fa8aa0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq2_xxs_f32 0x104892440 | th_max = 704 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq2_xs_f32 0x104892d10 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq3_xxs_f32 0x104fa93f0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq3_s_f32 0x104faa460 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq2_s_f32 0x104fa6b70 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq1_s_f32 0x104893ea0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq1_m_f32 0x104894ad0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq4_nl_f32 0x104895370 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_iq4_xs_f32 0x104faaea0 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_f32_f32 0x1027d7f80 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_f16_f32 0x104895940 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: skipping kernel_mul_mm_id_bf16_f32 (not supported)\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q4_0_f32 0x1027d8990 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q4_1_f32 0x104fab100 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q5_0_f32 0x104896580 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q5_1_f32 0x104896d30 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q8_0_f32 0x104fac3f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q2_K_f32 0x104897390 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q3_K_f32 0x105126820 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q4_K_f32 0x104897f40 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q5_K_f32 0x104fad540 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_q6_K_f32 0x105127c70 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq2_xxs_f32 0x105127ed0 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq2_xs_f32 0x104fae170 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq3_xxs_f32 0x1048988a0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq3_s_f32 0x104899420 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq2_s_f32 0x105128790 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq1_s_f32 0x104899680 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq1_m_f32 0x104fae3d0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq4_nl_f32 0x104faecb0 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_mul_mm_id_iq4_xs_f32 0x104fafaf0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_rope_norm_f32 0x10489a0f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_rope_norm_f16 0x104fb0c20 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_rope_neox_f32 0x1027d9570 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_rope_neox_f16 0x104faf820 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_im2col_f16 0x104fb03a0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_im2col_f32 0x104fb1800 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_im2col_ext_f16 0x104fb1f40 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_im2col_ext_f32 0x104fb28f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_upscale_f32 0x104fb2b50 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_pad_f32 0x104fb3c00 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_timestep_embedding_f32 0x10489c290 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_arange_f32 0x10489b320 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_argsort_f32_i32_asc 0x1027da260 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_argsort_f32_i32_desc 0x10489d220 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_leaky_relu_f32 0x10489e810 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_f16_h64 0x104fb52e0 | th_max = 704 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_f16_h80 0x105128f30 | th_max = 640 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_f16_h96 0x10489eec0 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_f16_h112 0x10489f120 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_f16_h128 0x105129190 | th_max = 512 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_f16_h256 0x104fb4ab0 | th_max = 512 | th_width = 32\n",
"ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64 (not supported)\n",
"ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h80 (not supported)\n",
"ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h96 (not supported)\n",
"ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h112 (not supported)\n",
"ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h128 (not supported)\n",
"ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h256 (not supported)\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h64 0x1027daa80 | th_max = 704 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h80 0x10489f900 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h96 0x1027db170 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h112 0x104fb6600 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h128 0x1027db840 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h256 0x10512a0f0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h64 0x1048a0630 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h80 0x1048a1040 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h96 0x10512aa50 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h112 0x10512b1e0 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h128 0x10512bb40 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h256 0x10512c2a0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h64 0x10512cc30 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h80 0x1048a1db0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h96 0x10512d6f0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h112 0x104fb7020 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h128 0x10512dd90 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h256 0x10512dff0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h64 0x104fb5c10 | th_max = 576 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h80 0x1048a28b0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h96 0x104fb7830 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h112 0x10512e890 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h128 0x1048a3e00 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h256 0x133605d80 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h64 0x10512f640 | th_max = 768 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h80 0x1048a4590 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h96 0x105130060 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h112 0x1027dbb00 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h128 0x1051307c0 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h256 0x105130e40 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_f16_h128 0x105131500 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: skipping kernel_flash_attn_ext_vec_bf16_h128 (not supported)\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_0_h128 0x105131ba0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_1_h128 0x1048a4fb0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_0_h128 0x1048a5bf0 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_1_h128 0x104f95d60 | th_max = 832 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q8_0_h128 0x104f37380 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_f16_h256 0x1022d4890 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: skipping kernel_flash_attn_ext_vec_bf16_h256 (not supported)\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_0_h256 0x1048a6210 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_1_h256 0x1027dc250 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_0_h256 0x104fb9080 | th_max = 704 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_1_h256 0x104fbafb0 | th_max = 704 | th_width = 32\n",
"ggml_metal_init: loaded kernel_flash_attn_ext_vec_q8_0_h256 0x104fbb9d0 | th_max = 896 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x104fba720 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x104fbcd90 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: skipping kernel_cpy_f32_bf16 (not supported)\n",
"ggml_metal_init: loaded kernel_cpy_f16_f32 0x1048a7470 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x1051324e0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: skipping kernel_cpy_bf16_f32 (not supported)\n",
"ggml_metal_init: skipping kernel_cpy_bf16_bf16 (not supported)\n",
"ggml_metal_init: loaded kernel_cpy_f32_q8_0 0x1048a7e60 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cpy_f32_q4_0 0x1027dd5a0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cpy_f32_q4_1 0x1048a80c0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cpy_f32_q5_0 0x1048a8cb0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cpy_f32_q5_1 0x105132740 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cpy_f32_iq4_nl 0x1048a9740 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_concat 0x105133480 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_sqr 0x1048aadb0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_sqrt 0x1048ab930 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_sin 0x1051346e0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_cos 0x104fbe4f0 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_sum_rows 0x105135010 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_pool_2d_avg_f32 0x1027dd800 | th_max = 1024 | th_width = 32\n",
"ggml_metal_init: loaded kernel_pool_2d_max_f32 0x1048ab4d0 | th_max = 1024 | th_width = 32\n",
"llama_kv_cache_init: Metal KV buffer size = 64.00 MiB\n",
"llama_new_context_with_model: KV self size = 64.00 MiB, K (f16): 32.00 MiB, V (f16): 32.00 MiB\n",
"llama_new_context_with_model: CPU output buffer size = 0.49 MiB\n",
"llama_new_context_with_model: Metal compute buffer size = 64.62 MiB\n",
"llama_new_context_with_model: CPU compute buffer size = 2.25 MiB\n",
"llama_new_context_with_model: graph nodes = 1030\n",
"llama_new_context_with_model: graph splits = 2\n",
"AVX = 0 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | AMX_INT8 = 0 | FMA = 0 | NEON = 1 | SVE = 0 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | RISCV_VECT = 0 | WASM_SIMD = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 | \n",
"Model metadata: {'general.quantization_version': '2', 'tokenizer.chat_template': \"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}{% endif %}\", 'tokenizer.ggml.eos_token_id': '128009', 'tokenizer.ggml.bos_token_id': '128000', 'tokenizer.ggml.pre': 'llama-bpe', 'tokenizer.ggml.model': 'gpt2', 'llama.vocab_size': '128256', 'llama.attention.head_count_kv': '8', 'llama.context_length': '8192', 'llama.attention.head_count': '32', 'general.file_type': '15', 'llama.feed_forward_length': '14336', 'llama.rope.dimension_count': '128', 'llama.rope.freq_base': '500000.000000', 'llama.embedding_length': '4096', 'general.architecture': 'llama', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'general.name': 'Llama-3-8B-optimal-merged-stage2', 'llama.block_count': '32'}\n",
"Available chat formats from metadata: chat_template.default\n"
]
}
],
"source": [
"from llama_cpp import Llama\n",
"\n",
"llm = Llama(\n",
" model_path=\"../models/Llama-3-ELYZA-JP-8B-q4_k_m.gguf\",\n",
" chat_format=\"llama-3\",\n",
" # n_ctx=1024,\n",
" n_batch=128,\n",
" n_gpu_layers=-1,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Llama.generate: 58 prefix-match hit, remaining 271 prompt tokens to eval\n",
"llama_perf_context_print: load time = 7094.75 ms\n",
"llama_perf_context_print: prompt eval time = 0.00 ms / 271 tokens ( 0.00 ms per token, inf tokens per second)\n",
"llama_perf_context_print: eval time = 0.00 ms / 165 runs ( 0.00 ms per token, inf tokens per second)\n",
"llama_perf_context_print: total time = 25745.42 ms / 436 tokens\n"
]
}
],
"source": [
"response = llm.create_chat_completion(\n",
" messages=[\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"あなたは誠実で優秀な日本人のアシスタントです。特に指示が無い場合は、常に日本語で回答してください。\",\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"以下の文章を要約してください。\\\n",
" # 文章\\\n",
" クリアするまで脱出不可能、ゲームオーバーは本当の“死”を意味する──。謎の次世代MMO『ソードアート・オンライン(SAO)』の“真実”を知らずログインした約一万人のユーザーと共に、その過酷なデスバトルは幕を開けた。\\\n",
" SAOに参加した一人である主人公・キリトは、いち早くこのMMOの“真実”を受け入れる。そして、ゲームの舞台となる巨大浮遊城『アインクラッド』で、パーティを組まないソロプレイヤーとして頭角をあらわしていった。\\\n",
" クリア条件である最上階層到達を目指し、熾烈な冒険(クエスト)を単独で続けるキリトだったが、レイピアの名手・女流剣士アスナの強引な誘いによって彼女とコンビを組むことになってしまう。その出会いは、キリトに運命とも呼べる契機をもたらし……。果たして、キリトはこのゲームから抜け出すことができるのか。\",\n",
" },\n",
" ],\n",
" max_tokens=1024,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"以下は文章の要約です。\n",
"\n",
"謎のMMO『ソードアート・オンライン(SAO)』に約1万人のユーザーがログインしたが、ゲームの真実を知らずに脱出不可能なデスバトルが始まった。主人公のキリトは早くに真実を受け入れ、巨大浮遊城『アインクラッド』でソロプレイヤーとして活躍する。クリア条件の最上階層到達を目指すキリトは、女流剣士アスナとコンビを組むことになり、運命の出会いを果たす。果たしてキリトはこのゲームから抜け出すことができるのか。\n"
]
}
],
"source": [
"print(response[\"choices\"][0][\"message\"][\"content\"])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'verbose': True,\n",
" '_stack': <contextlib.ExitStack at 0x105b60ad0>,\n",
" 'numa': 0,\n",
" 'model_path': '../models/Llama-3-ELYZA-JP-8B-q4_k_m.gguf',\n",
" 'model_params': <llama_cpp.llama_cpp.llama_model_params at 0x115e5ab50>,\n",
" '_rpc_servers': None,\n",
" 'tensor_split': None,\n",
" '_c_tensor_split': None,\n",
" 'kv_overrides': None,\n",
" 'n_batch': 128,\n",
" 'n_threads': 4,\n",
" 'n_threads_batch': 8,\n",
" '_seed': 1894574933,\n",
" 'context_params': <llama_cpp.llama_cpp.llama_context_params at 0x115e59dd0>,\n",
" 'last_n_tokens_size': 64,\n",
" 'cache': None,\n",
" 'lora_base': None,\n",
" 'lora_scale': 1.0,\n",
" 'lora_path': None,\n",
" 'spm_infill': False,\n",
" '_model': <llama_cpp._internals.LlamaModel at 0x105ad6690>,\n",
" 'tokenizer_': <llama_cpp.llama_tokenizer.LlamaTokenizer at 0x115e0e450>,\n",
" '_ctx': <llama_cpp._internals.LlamaContext at 0x105b5d1d0>,\n",
" '_batch': <llama_cpp._internals.LlamaBatch at 0x10475e9d0>,\n",
" '_lora_adapter': None,\n",
" 'chat_format': 'llama-3',\n",
" 'chat_handler': None,\n",
" '_chat_handlers': {'chat_template.default': <function llama_cpp.llama_chat_format.chat_formatter_to_chat_completion_handler.<locals>.chat_completion_handler(*, llama: 'llama.Llama', messages: 'List[llama_types.ChatCompletionRequestMessage]', functions: 'Optional[List[llama_types.ChatCompletionFunction]]' = None, function_call: 'Optional[llama_types.ChatCompletionRequestFunctionCall]' = None, tools: 'Optional[List[llama_types.ChatCompletionTool]]' = None, tool_choice: 'Optional[llama_types.ChatCompletionToolChoiceOption]' = None, temperature: 'float' = 0.2, top_p: 'float' = 0.95, top_k: 'int' = 40, min_p: 'float' = 0.05, typical_p: 'float' = 1.0, stream: 'bool' = False, stop: 'Optional[Union[str, List[str]]]' = [], seed: 'Optional[int]' = None, response_format: 'Optional[llama_types.ChatCompletionRequestResponseFormat]' = None, max_tokens: 'Optional[int]' = None, presence_penalty: 'float' = 0.0, frequency_penalty: 'float' = 0.0, repeat_penalty: 'float' = 1.1, tfs_z: 'float' = 1.0, mirostat_mode: 'int' = 0, mirostat_tau: 'float' = 5.0, mirostat_eta: 'float' = 0.1, model: 'Optional[str]' = None, logits_processor: 'Optional[llama.LogitsProcessorList]' = None, grammar: 'Optional[llama.LlamaGrammar]' = None, logit_bias: 'Optional[Dict[str, float]]' = None, logprobs: 'Optional[bool]' = None, top_logprobs: 'Optional[int]' = None, **kwargs) -> 'Union[llama_types.CreateChatCompletionResponse, Iterator[llama_types.CreateChatCompletionStreamResponse]]'>},\n",
" 'draft_model': None,\n",
" '_n_vocab': 128256,\n",
" '_n_ctx': 512,\n",
" '_token_nl': 128,\n",
" '_token_eos': 128009,\n",
" '_candidates': <llama_cpp._internals.LlamaTokenDataArray at 0x115ec1450>,\n",
" 'n_tokens': 494,\n",
" 'input_ids': array([128000, 128006, 9125, 128007, 271, 30591, 112568, 15682,\n",
" 124097, 103350, 16556, 104622, 106241, 26854, 102433, 107707,\n",
" 39880, 57207, 105335, 52414, 38641, 1811, 66378, 20230,\n",
" 64467, 20379, 29295, 43568, 16995, 126513, 5486, 40053,\n",
" 20230, 102433, 102158, 16556, 113925, 39926, 72315, 1811,\n",
" 128009, 128006, 882, 128007, 271, 88852, 16144, 83125,\n",
" 30512, 31634, 103664, 39926, 72315, 1811, 310, 674,\n",
" 112053, 310, 116381, 104612, 54926, 103296, 110645, 20834,\n",
" 16937, 88367, 5486, 114567, 90962, 112164, 11972, 15682,\n",
" 117475, 16144, 2118, 102625, 863, 30512, 115552, 54926,\n",
" 17424, 1811, 105037, 236, 16144, 33671, 101083, 31640,\n",
" 8195, 46, 44620, 102741, 65575, 39880, 84477, 9458,\n",
" 110191, 118372, 10110, 7934, 46, 7705, 36761, 16144,\n",
" 2118, 89151, 103350, 863, 30512, 53283, 121140, 77750,\n",
" 76171, 56051, 103664, 15120, 32307, 107707, 108152, 38248,\n",
" 114, 11972, 19732, 55999, 20230, 106116, 103188, 100845,\n",
" 115, 26854, 68408, 22398, 66953, 127764, 15682, 106633,\n",
" 125523, 107674, 1811, 310, 16998, 46, 20230, 110284,\n",
" 56051, 122485, 103195, 122768, 35417, 9458, 62903, 37823,\n",
" 20251, 15682, 116898, 43514, 103856, 47884, 51330, 8195,\n",
" 46, 16144, 2118, 89151, 103350, 863, 114475, 76622,\n",
" 17701, 104028, 124845, 5486, 114567, 16144, 107875, 55038,\n",
" 117282, 109098, 27384, 111179, 109739, 60174, 44620, 39880,\n",
" 76171, 107059, 105404, 36761, 16556, 5486, 80805, 117675,\n",
" 30512, 103214, 17129, 100604, 102741, 42634, 57326, 108748,\n",
" 104930, 11972, 103306, 103892, 64936, 30512, 30591, 33503,\n",
" 78183, 109768, 100472, 1811, 310, 116381, 104612, 77195,\n",
" 103195, 32335, 17905, 106090, 114050, 28037, 104067, 30512,\n",
" 30832, 64467, 15024, 5486, 102448, 122, 111101, 26854,\n",
" 112798, 117126, 10110, 29220, 76739, 71634, 7705, 30512,\n",
" 110904, 106063, 16556, 106307, 105784, 62903, 37823, 20251,\n",
" 103351, 29295, 5486, 108748, 70563, 39880, 16144, 13372,\n",
" 46034, 9458, 58850, 89753, 119063, 101559, 39880, 22398,\n",
" 96452, 16144, 104195, 73686, 26854, 45918, 246, 16995,\n",
" 113468, 109453, 19732, 109713, 91482, 30512, 103214, 104004,\n",
" 100909, 117084, 114732, 109807, 20834, 38093, 16995, 15682,\n",
" 5486, 62903, 37823, 20251, 20230, 103768, 51609, 107173,\n",
" 105324, 103854, 30369, 120273, 101513, 30512, 32977, 28713,\n",
" 124949, 127891, 28873, 28713, 39926, 5486, 62903, 37823,\n",
" 20251, 15682, 51330, 114567, 55031, 113487, 76622, 121406,\n",
" 105908, 108608, 104865, 1811, 128009, 128006, 78191, 128007,\n",
" 271, 88852, 15682, 83125, 16144, 31634, 103664, 38641,\n",
" 3490, 105037, 236, 16144, 8195, 46, 44620, 102741,\n",
" 65575, 39880, 84477, 9458, 110191, 118372, 10110, 7934,\n",
" 46, 7705, 36761, 20230, 103664, 16, 32307, 107707,\n",
" 108152, 38248, 114, 11972, 29295, 77750, 76171, 56051,\n",
" 29295, 5486, 114567, 16144, 89151, 103350, 30512, 53283,\n",
" 121140, 20230, 110645, 20834, 16937, 88367, 26854, 68408,\n",
" 22398, 66953, 127764, 29295, 27704, 117864, 1811, 122768,\n",
" 35417, 16144, 62903, 37823, 20251, 15682, 103856, 47884,\n",
" 20230, 89151, 103350, 114475, 76622, 125639, 5486, 109098,\n",
" 27384, 111179, 109739, 60174, 44620, 39880, 76171, 107059,\n",
" 105404, 36761, 16556, 102741, 42634, 57326, 108748, 104930,\n",
" 11972, 103306, 76706, 109526, 235, 54926, 1811, 29220,\n",
" 104612, 77195, 16144, 32335, 17905, 106090, 114050, 28037,\n",
" 104067, 30512, 30832, 64467, 17663, 62903, 37823, 20251,\n",
" 15682, 5486, 58850, 89753, 119063, 101559, 39880, 22398,\n",
" 96452, 19732, 109713, 91482, 30512, 103214, 104004, 100909,\n",
" 115717, 5486, 103768, 51609, 16144, 20834, 38093, 16995,\n",
" 30512, 28873, 28713, 17663, 1811, 28873, 28713, 39926,\n",
" 62903, 37823, 20251, 15682, 51330, 114567, 55031, 113487,\n",
" 76622, 121406, 105908, 108608, 104865, 1811, 127173, 104028,\n",
" 108044, 108323, 19732, 97518, 89046, 47000, 107441, 108086,\n",
" 5486, 105469, 103424, 102212, 23530, 15682, 17039, 1],\n",
" dtype=int32),\n",
" 'scores': array([[0., 0., 0., ..., 0., 0., 0.],\n",
" [0., 0., 0., ..., 0., 0., 0.],\n",
" [0., 0., 0., ..., 0., 0., 0.],\n",
" ...,\n",
" [0., 0., 0., ..., 0., 0., 0.],\n",
" [0., 0., 0., ..., 0., 0., 0.],\n",
" [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),\n",
" '_mirostat_mu': c_float(10.0),\n",
" 'metadata': {'general.quantization_version': '2',\n",
" 'tokenizer.chat_template': \"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}{% endif %}\",\n",
" 'tokenizer.ggml.eos_token_id': '128009',\n",
" 'tokenizer.ggml.bos_token_id': '128000',\n",
" 'tokenizer.ggml.pre': 'llama-bpe',\n",
" 'tokenizer.ggml.model': 'gpt2',\n",
" 'llama.vocab_size': '128256',\n",
" 'llama.attention.head_count_kv': '8',\n",
" 'llama.context_length': '8192',\n",
" 'llama.attention.head_count': '32',\n",
" 'general.file_type': '15',\n",
" 'llama.feed_forward_length': '14336',\n",
" 'llama.rope.dimension_count': '128',\n",
" 'llama.rope.freq_base': '500000.000000',\n",
" 'llama.embedding_length': '4096',\n",
" 'general.architecture': 'llama',\n",
" 'llama.attention.layer_norm_rms_epsilon': '0.000010',\n",
" 'general.name': 'Llama-3-8B-optimal-merged-stage2',\n",
" 'llama.block_count': '32'},\n",
" '_sampler': <llama_cpp._internals.LlamaSampler at 0x1031ee450>}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vars(llm)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|