File size: 82,153 Bytes
1a2153f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_load_model_from_file: using device Metal (Apple M1) - 17592186044388 MiB free\n",
      "llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from ../models/Llama-3-ELYZA-JP-8B-q4_k_m.gguf (version GGUF V3 (latest))\n",
      "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
      "llama_model_loader: - kv   0:                       general.architecture str              = llama\n",
      "llama_model_loader: - kv   1:                               general.name str              = Llama-3-8B-optimal-merged-stage2\n",
      "llama_model_loader: - kv   2:                          llama.block_count u32              = 32\n",
      "llama_model_loader: - kv   3:                       llama.context_length u32              = 8192\n",
      "llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096\n",
      "llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336\n",
      "llama_model_loader: - kv   6:                 llama.attention.head_count u32              = 32\n",
      "llama_model_loader: - kv   7:              llama.attention.head_count_kv u32              = 8\n",
      "llama_model_loader: - kv   8:                       llama.rope.freq_base f32              = 500000.000000\n",
      "llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n",
      "llama_model_loader: - kv  10:                          general.file_type u32              = 15\n",
      "llama_model_loader: - kv  11:                           llama.vocab_size u32              = 128256\n",
      "llama_model_loader: - kv  12:                 llama.rope.dimension_count u32              = 128\n",
      "llama_model_loader: - kv  13:                       tokenizer.ggml.model str              = gpt2\n",
      "llama_model_loader: - kv  14:                         tokenizer.ggml.pre str              = llama-bpe\n",
      "llama_model_loader: - kv  15:                      tokenizer.ggml.tokens arr[str,128256]  = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n",
      "llama_model_loader: - kv  16:                  tokenizer.ggml.token_type arr[i32,128256]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n",
      "llama_model_loader: - kv  17:                      tokenizer.ggml.merges arr[str,280147]  = [\"Ġ Ġ\", \"Ġ ĠĠĠ\", \"ĠĠ ĠĠ\", \"...\n",
      "llama_model_loader: - kv  18:                tokenizer.ggml.bos_token_id u32              = 128000\n",
      "llama_model_loader: - kv  19:                tokenizer.ggml.eos_token_id u32              = 128009\n",
      "llama_model_loader: - kv  20:                    tokenizer.chat_template str              = {% set loop_messages = messages %}{% ...\n",
      "llama_model_loader: - kv  21:               general.quantization_version u32              = 2\n",
      "llama_model_loader: - type  f32:   65 tensors\n",
      "llama_model_loader: - type q4_K:  193 tensors\n",
      "llama_model_loader: - type q6_K:   33 tensors\n",
      "llm_load_vocab: control token: 128255 '<|reserved_special_token_250|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128253 '<|reserved_special_token_248|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128251 '<|reserved_special_token_246|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128249 '<|reserved_special_token_244|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128248 '<|reserved_special_token_243|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128247 '<|reserved_special_token_242|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128245 '<|reserved_special_token_240|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128244 '<|reserved_special_token_239|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128242 '<|reserved_special_token_237|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128241 '<|reserved_special_token_236|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128240 '<|reserved_special_token_235|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128237 '<|reserved_special_token_232|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128235 '<|reserved_special_token_230|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128232 '<|reserved_special_token_227|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128231 '<|reserved_special_token_226|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128226 '<|reserved_special_token_221|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128224 '<|reserved_special_token_219|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128223 '<|reserved_special_token_218|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128221 '<|reserved_special_token_216|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128220 '<|reserved_special_token_215|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128218 '<|reserved_special_token_213|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128216 '<|reserved_special_token_211|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128215 '<|reserved_special_token_210|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128214 '<|reserved_special_token_209|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128213 '<|reserved_special_token_208|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128212 '<|reserved_special_token_207|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128210 '<|reserved_special_token_205|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128208 '<|reserved_special_token_203|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128207 '<|reserved_special_token_202|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128206 '<|reserved_special_token_201|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128205 '<|reserved_special_token_200|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128204 '<|reserved_special_token_199|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128201 '<|reserved_special_token_196|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128199 '<|reserved_special_token_194|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128194 '<|reserved_special_token_189|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128192 '<|reserved_special_token_187|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128191 '<|reserved_special_token_186|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128188 '<|reserved_special_token_183|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128187 '<|reserved_special_token_182|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128185 '<|reserved_special_token_180|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128184 '<|reserved_special_token_179|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128182 '<|reserved_special_token_177|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128181 '<|reserved_special_token_176|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128180 '<|reserved_special_token_175|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128175 '<|reserved_special_token_170|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128174 '<|reserved_special_token_169|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128173 '<|reserved_special_token_168|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128172 '<|reserved_special_token_167|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128171 '<|reserved_special_token_166|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128170 '<|reserved_special_token_165|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128169 '<|reserved_special_token_164|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128166 '<|reserved_special_token_161|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128164 '<|reserved_special_token_159|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128163 '<|reserved_special_token_158|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128157 '<|reserved_special_token_152|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128156 '<|reserved_special_token_151|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128154 '<|reserved_special_token_149|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128153 '<|reserved_special_token_148|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128151 '<|reserved_special_token_146|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128149 '<|reserved_special_token_144|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128148 '<|reserved_special_token_143|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128147 '<|reserved_special_token_142|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128144 '<|reserved_special_token_139|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128141 '<|reserved_special_token_136|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128139 '<|reserved_special_token_134|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128138 '<|reserved_special_token_133|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128137 '<|reserved_special_token_132|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128130 '<|reserved_special_token_125|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128127 '<|reserved_special_token_122|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128125 '<|reserved_special_token_120|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128124 '<|reserved_special_token_119|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128123 '<|reserved_special_token_118|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128122 '<|reserved_special_token_117|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128121 '<|reserved_special_token_116|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128120 '<|reserved_special_token_115|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128119 '<|reserved_special_token_114|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128118 '<|reserved_special_token_113|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128117 '<|reserved_special_token_112|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128116 '<|reserved_special_token_111|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128113 '<|reserved_special_token_108|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128112 '<|reserved_special_token_107|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128111 '<|reserved_special_token_106|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128110 '<|reserved_special_token_105|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128108 '<|reserved_special_token_103|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128107 '<|reserved_special_token_102|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128104 '<|reserved_special_token_99|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128103 '<|reserved_special_token_98|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128102 '<|reserved_special_token_97|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128101 '<|reserved_special_token_96|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128100 '<|reserved_special_token_95|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128097 '<|reserved_special_token_92|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128094 '<|reserved_special_token_89|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128093 '<|reserved_special_token_88|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128091 '<|reserved_special_token_86|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128090 '<|reserved_special_token_85|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128087 '<|reserved_special_token_82|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128086 '<|reserved_special_token_81|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128084 '<|reserved_special_token_79|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128082 '<|reserved_special_token_77|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128077 '<|reserved_special_token_72|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128074 '<|reserved_special_token_69|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128073 '<|reserved_special_token_68|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128070 '<|reserved_special_token_65|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128067 '<|reserved_special_token_62|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128066 '<|reserved_special_token_61|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128064 '<|reserved_special_token_59|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128061 '<|reserved_special_token_56|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128059 '<|reserved_special_token_54|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128058 '<|reserved_special_token_53|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128057 '<|reserved_special_token_52|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128051 '<|reserved_special_token_46|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128042 '<|reserved_special_token_37|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128041 '<|reserved_special_token_36|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128040 '<|reserved_special_token_35|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128039 '<|reserved_special_token_34|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128035 '<|reserved_special_token_30|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128034 '<|reserved_special_token_29|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128032 '<|reserved_special_token_27|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128031 '<|reserved_special_token_26|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128030 '<|reserved_special_token_25|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128029 '<|reserved_special_token_24|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128027 '<|reserved_special_token_22|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128026 '<|reserved_special_token_21|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128025 '<|reserved_special_token_20|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128023 '<|reserved_special_token_18|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128022 '<|reserved_special_token_17|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128021 '<|reserved_special_token_16|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128019 '<|reserved_special_token_14|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128017 '<|reserved_special_token_12|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128014 '<|reserved_special_token_9|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128013 '<|reserved_special_token_8|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128012 '<|reserved_special_token_7|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128011 '<|reserved_special_token_6|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128010 '<|reserved_special_token_5|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128006 '<|start_header_id|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128005 '<|reserved_special_token_3|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128003 '<|reserved_special_token_1|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128002 '<|reserved_special_token_0|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128000 '<|begin_of_text|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128038 '<|reserved_special_token_33|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128060 '<|reserved_special_token_55|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128043 '<|reserved_special_token_38|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128007 '<|end_header_id|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128062 '<|reserved_special_token_57|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128168 '<|reserved_special_token_163|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128159 '<|reserved_special_token_154|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128162 '<|reserved_special_token_157|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128054 '<|reserved_special_token_49|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128047 '<|reserved_special_token_42|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128053 '<|reserved_special_token_48|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128227 '<|reserved_special_token_222|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128095 '<|reserved_special_token_90|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128150 '<|reserved_special_token_145|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128081 '<|reserved_special_token_76|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128079 '<|reserved_special_token_74|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128099 '<|reserved_special_token_94|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128250 '<|reserved_special_token_245|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128176 '<|reserved_special_token_171|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128068 '<|reserved_special_token_63|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128132 '<|reserved_special_token_127|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128158 '<|reserved_special_token_153|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128161 '<|reserved_special_token_156|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128131 '<|reserved_special_token_126|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128246 '<|reserved_special_token_241|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128254 '<|reserved_special_token_249|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128033 '<|reserved_special_token_28|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128145 '<|reserved_special_token_140|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128178 '<|reserved_special_token_173|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128219 '<|reserved_special_token_214|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128072 '<|reserved_special_token_67|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128238 '<|reserved_special_token_233|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128048 '<|reserved_special_token_43|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128065 '<|reserved_special_token_60|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128146 '<|reserved_special_token_141|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128198 '<|reserved_special_token_193|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128055 '<|reserved_special_token_50|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128143 '<|reserved_special_token_138|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128140 '<|reserved_special_token_135|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128020 '<|reserved_special_token_15|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128036 '<|reserved_special_token_31|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128129 '<|reserved_special_token_124|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128098 '<|reserved_special_token_93|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128209 '<|reserved_special_token_204|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128186 '<|reserved_special_token_181|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128222 '<|reserved_special_token_217|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128126 '<|reserved_special_token_121|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128004 '<|reserved_special_token_2|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128075 '<|reserved_special_token_70|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128160 '<|reserved_special_token_155|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128069 '<|reserved_special_token_64|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128109 '<|reserved_special_token_104|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128183 '<|reserved_special_token_178|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128092 '<|reserved_special_token_87|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128106 '<|reserved_special_token_101|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128096 '<|reserved_special_token_91|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128135 '<|reserved_special_token_130|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128190 '<|reserved_special_token_185|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128196 '<|reserved_special_token_191|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128045 '<|reserved_special_token_40|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128085 '<|reserved_special_token_80|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128189 '<|reserved_special_token_184|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128133 '<|reserved_special_token_128|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128089 '<|reserved_special_token_84|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128155 '<|reserved_special_token_150|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128001 '<|end_of_text|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128046 '<|reserved_special_token_41|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128028 '<|reserved_special_token_23|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128252 '<|reserved_special_token_247|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128179 '<|reserved_special_token_174|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128063 '<|reserved_special_token_58|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128177 '<|reserved_special_token_172|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128230 '<|reserved_special_token_225|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128076 '<|reserved_special_token_71|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128078 '<|reserved_special_token_73|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128228 '<|reserved_special_token_223|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128193 '<|reserved_special_token_188|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128044 '<|reserved_special_token_39|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128080 '<|reserved_special_token_75|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128136 '<|reserved_special_token_131|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128128 '<|reserved_special_token_123|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128115 '<|reserved_special_token_110|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128050 '<|reserved_special_token_45|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128217 '<|reserved_special_token_212|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128105 '<|reserved_special_token_100|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128088 '<|reserved_special_token_83|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128200 '<|reserved_special_token_195|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128056 '<|reserved_special_token_51|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128016 '<|reserved_special_token_11|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128167 '<|reserved_special_token_162|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128202 '<|reserved_special_token_197|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128037 '<|reserved_special_token_32|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128197 '<|reserved_special_token_192|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128233 '<|reserved_special_token_228|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128142 '<|reserved_special_token_137|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128165 '<|reserved_special_token_160|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128211 '<|reserved_special_token_206|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128134 '<|reserved_special_token_129|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128229 '<|reserved_special_token_224|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128236 '<|reserved_special_token_231|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128052 '<|reserved_special_token_47|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128225 '<|reserved_special_token_220|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128203 '<|reserved_special_token_198|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128015 '<|reserved_special_token_10|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128008 '<|reserved_special_token_4|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128195 '<|reserved_special_token_190|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128018 '<|reserved_special_token_13|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128083 '<|reserved_special_token_78|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128071 '<|reserved_special_token_66|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128024 '<|reserved_special_token_19|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128239 '<|reserved_special_token_234|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128152 '<|reserved_special_token_147|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128049 '<|reserved_special_token_44|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128243 '<|reserved_special_token_238|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128114 '<|reserved_special_token_109|>' is not marked as EOG\n",
      "llm_load_vocab: control token: 128234 '<|reserved_special_token_229|>' is not marked as EOG\n",
      "llm_load_vocab: special tokens cache size = 256\n",
      "llm_load_vocab: token to piece cache size = 0.8000 MB\n",
      "llm_load_print_meta: format           = GGUF V3 (latest)\n",
      "llm_load_print_meta: arch             = llama\n",
      "llm_load_print_meta: vocab type       = BPE\n",
      "llm_load_print_meta: n_vocab          = 128256\n",
      "llm_load_print_meta: n_merges         = 280147\n",
      "llm_load_print_meta: vocab_only       = 0\n",
      "llm_load_print_meta: n_ctx_train      = 8192\n",
      "llm_load_print_meta: n_embd           = 4096\n",
      "llm_load_print_meta: n_layer          = 32\n",
      "llm_load_print_meta: n_head           = 32\n",
      "llm_load_print_meta: n_head_kv        = 8\n",
      "llm_load_print_meta: n_rot            = 128\n",
      "llm_load_print_meta: n_swa            = 0\n",
      "llm_load_print_meta: n_embd_head_k    = 128\n",
      "llm_load_print_meta: n_embd_head_v    = 128\n",
      "llm_load_print_meta: n_gqa            = 4\n",
      "llm_load_print_meta: n_embd_k_gqa     = 1024\n",
      "llm_load_print_meta: n_embd_v_gqa     = 1024\n",
      "llm_load_print_meta: f_norm_eps       = 0.0e+00\n",
      "llm_load_print_meta: f_norm_rms_eps   = 1.0e-05\n",
      "llm_load_print_meta: f_clamp_kqv      = 0.0e+00\n",
      "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n",
      "llm_load_print_meta: f_logit_scale    = 0.0e+00\n",
      "llm_load_print_meta: n_ff             = 14336\n",
      "llm_load_print_meta: n_expert         = 0\n",
      "llm_load_print_meta: n_expert_used    = 0\n",
      "llm_load_print_meta: causal attn      = 1\n",
      "llm_load_print_meta: pooling type     = 0\n",
      "llm_load_print_meta: rope type        = 0\n",
      "llm_load_print_meta: rope scaling     = linear\n",
      "llm_load_print_meta: freq_base_train  = 500000.0\n",
      "llm_load_print_meta: freq_scale_train = 1\n",
      "llm_load_print_meta: n_ctx_orig_yarn  = 8192\n",
      "llm_load_print_meta: rope_finetuned   = unknown\n",
      "llm_load_print_meta: ssm_d_conv       = 0\n",
      "llm_load_print_meta: ssm_d_inner      = 0\n",
      "llm_load_print_meta: ssm_d_state      = 0\n",
      "llm_load_print_meta: ssm_dt_rank      = 0\n",
      "llm_load_print_meta: ssm_dt_b_c_rms   = 0\n",
      "llm_load_print_meta: model type       = 8B\n",
      "llm_load_print_meta: model ftype      = Q4_K - Medium\n",
      "llm_load_print_meta: model params     = 8.03 B\n",
      "llm_load_print_meta: model size       = 4.58 GiB (4.89 BPW) \n",
      "llm_load_print_meta: general.name     = Llama-3-8B-optimal-merged-stage2\n",
      "llm_load_print_meta: BOS token        = 128000 '<|begin_of_text|>'\n",
      "llm_load_print_meta: EOS token        = 128009 '<|eot_id|>'\n",
      "llm_load_print_meta: EOT token        = 128009 '<|eot_id|>'\n",
      "llm_load_print_meta: LF token         = 128 'Ä'\n",
      "llm_load_print_meta: EOG token        = 128009 '<|eot_id|>'\n",
      "llm_load_print_meta: max token length = 256\n",
      "llm_load_tensors: tensor 'token_embd.weight' (q4_K) (and 0 others) cannot be used with preferred buffer type CPU_AARCH64, using CPU instead\n",
      "ggml_backend_metal_log_allocated_size: allocated buffer, size =  4096.00 MiB, ( 9584.69 /  5461.34)\n",
      "ggml_backend_metal_log_allocated_size: warning: current allocated size is greater than the recommended max working set size\n",
      "\n",
      "ggml_backend_metal_log_allocated_size: allocated buffer, size =  1000.31 MiB, (10585.00 /  5461.34)\n",
      "ggml_backend_metal_log_allocated_size: warning: current allocated size is greater than the recommended max working set size\n",
      "llm_load_tensors: offloading 32 repeating layers to GPU\n",
      "llm_load_tensors: offloading output layer to GPU\n",
      "llm_load_tensors: offloaded 33/33 layers to GPU\n",
      "llm_load_tensors: Metal_Mapped model buffer size =  4685.31 MiB\n",
      "llm_load_tensors:   CPU_Mapped model buffer size =   281.81 MiB\n",
      ".......................................................................................\n",
      "llama_new_context_with_model: n_seq_max     = 1\n",
      "llama_new_context_with_model: n_ctx         = 512\n",
      "llama_new_context_with_model: n_ctx_per_seq = 512\n",
      "llama_new_context_with_model: n_batch       = 128\n",
      "llama_new_context_with_model: n_ubatch      = 128\n",
      "llama_new_context_with_model: flash_attn    = 0\n",
      "llama_new_context_with_model: freq_base     = 500000.0\n",
      "llama_new_context_with_model: freq_scale    = 1\n",
      "llama_new_context_with_model: n_ctx_per_seq (512) < n_ctx_train (8192) -- the full capacity of the model will not be utilized\n",
      "ggml_metal_init: allocating\n",
      "ggml_metal_init: found device: Apple M1\n",
      "ggml_metal_init: picking default device: Apple M1\n",
      "ggml_metal_init: using embedded metal library\n",
      "ggml_metal_init: GPU name:   Apple M1\n",
      "ggml_metal_init: GPU family: MTLGPUFamilyApple7  (1007)\n",
      "ggml_metal_init: GPU family: MTLGPUFamilyCommon3 (3003)\n",
      "ggml_metal_init: GPU family: MTLGPUFamilyMetal3  (5001)\n",
      "ggml_metal_init: simdgroup reduction   = true\n",
      "ggml_metal_init: simdgroup matrix mul. = true\n",
      "ggml_metal_init: has bfloat            = true\n",
      "ggml_metal_init: use bfloat            = false\n",
      "ggml_metal_init: hasUnifiedMemory      = true\n",
      "ggml_metal_init: recommendedMaxWorkingSetSize  =  5726.63 MB\n",
      "ggml_metal_init: loaded kernel_add                                    0x10487d590 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_add_row                                0x104f3a120 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sub                                    0x1051058f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sub_row                                0x104f9fcc0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul                                    0x104f9ff20 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_row                                0x104fa0500 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_div                                    0x10487d7f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_div_row                                0x10487ddd0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_repeat_f32                             0x10487e030 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_repeat_f16                             0x104fa0760 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_repeat_i32                             0x10487e290 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_repeat_i16                             0x10487e570 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_scale                                  0x10487ef10 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_scale_4                                0x10487f590 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_clamp                                  0x104fa0d30 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_tanh                                   0x104fa1210 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_relu                                   0x104fa16f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sigmoid                                0x1027cf550 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_gelu                                   0x1027cfb00 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_gelu_4                                 0x10511da30 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_gelu_quick                             0x104fa1bd0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_gelu_quick_4                           0x10511df10 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_silu                                   0x10511e3f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_silu_4                                 0x1027d05f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_soft_max_f16                           0x104fa1e30 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_soft_max_f16_4                         0x104fa2090 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_soft_max_f32                           0x10487fde0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_soft_max_f32_4                         0x10511e8d0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_diag_mask_inf                          0x10511f1a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_diag_mask_inf_8                        0x104fa22f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_f32                           0x104fa2550 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_f16                           0x104880720 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)\n",
      "ggml_metal_init: loaded kernel_get_rows_q4_0                          0x105104080 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q4_1                          0x104880cb0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q5_0                          0x1027d12a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q5_1                          0x1027d1b90 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q8_0                          0x1048815d0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q2_K                          0x1027d24e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q3_K                          0x1027d2e00 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q4_K                          0x104881f30 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q5_K                          0x1048825d0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q6_K                          0x104fa27b0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq2_xxs                       0x1027d34e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq2_xs                        0x1048829e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq3_xxs                       0x105120270 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq3_s                         0x1051204d0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq2_s                         0x104fa2a10 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq1_s                         0x1027d3740 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq1_m                         0x104fa2c70 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq4_nl                        0x104fa2ed0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq4_xs                        0x105120d20 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_i32                           0x104fa3130 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rms_norm                               0x104883500 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_group_norm                             0x104fa3390 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_norm                                   0x104fa35f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_ssm_conv_f32                           0x1051209e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_ssm_scan_f32                           0x104884090 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_f32_f32                         0x1048842f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)\n",
      "ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)\n",
      "ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)\n",
      "ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)\n",
      "ggml_metal_init: loaded kernel_mul_mv_f16_f32                         0x1048847b0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_f16_f32_1row                    0x104885180 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_f16_f32_l4                      0x104fa3850 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_f16_f16                         0x104885ac0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q4_0_f32                        0x104886480 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q4_1_f32                        0x1027d3e70 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q5_0_f32                        0x104fa3b30 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q5_1_f32                        0x104886f70 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q8_0_f32                        0x104fa3d90 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q2_K_f32                        0x1051223c0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q3_K_f32                        0x1051227c0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q4_K_f32                        0x105123850 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q5_K_f32                        0x1027d40d0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q6_K_f32                        0x1027d4330 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq2_xxs_f32                     0x104fa45c0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq2_xs_f32                      0x104887b70 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq3_xxs_f32                     0x104888900 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq3_s_f32                       0x104fa4820 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq2_s_f32                       0x1027d4870 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq1_s_f32                       0x104889360 | th_max =  448 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq1_m_f32                       0x1027d5710 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq4_nl_f32                      0x104fa4dc0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq4_xs_f32                      0x104889770 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_f32_f32                      0x10488a040 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_f16_f32                      0x10484e1a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q4_0_f32                     0x1027d5970 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q4_1_f32                     0x1027d60c0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q5_0_f32                     0x104889c40 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q5_1_f32                     0x1027d68f0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q8_0_f32                     0x10487ced0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q2_K_f32                     0x10487d2b0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q3_K_f32                     0x10488b2f0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q4_K_f32                     0x10488bce0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q5_K_f32                     0x10488bf80 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q6_K_f32                     0x1027d70c0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq2_xxs_f32                  0x1027d7890 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq2_xs_f32                   0x10488d050 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq3_xxs_f32                  0x10488da40 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq3_s_f32                    0x10488dce0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq2_s_f32                    0x10488e560 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq1_s_f32                    0x10488f760 | th_max =  448 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq1_m_f32                    0x10488fe30 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq4_nl_f32                   0x1051094b0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq4_xs_f32                   0x104fa5e70 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_f32_f32                         0x104891150 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                         0x104fa63b0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_mul_mm_bf16_f32                   (not supported)\n",
      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                        0x1051197d0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                        0x104fa6eb0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q5_0_f32                        0x105124b10 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q5_1_f32                        0x104fa77d0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32                        0x1051258c0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                        0x104fa8150 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                        0x105125e20 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                        0x104891680 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                        0x1048918e0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                        0x104fa8aa0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq2_xxs_f32                     0x104892440 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq2_xs_f32                      0x104892d10 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq3_xxs_f32                     0x104fa93f0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq3_s_f32                       0x104faa460 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq2_s_f32                       0x104fa6b70 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq1_s_f32                       0x104893ea0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq1_m_f32                       0x104894ad0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq4_nl_f32                      0x104895370 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq4_xs_f32                      0x104faaea0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_f32_f32                      0x1027d7f80 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_f16_f32                      0x104895940 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_mul_mm_id_bf16_f32                (not supported)\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q4_0_f32                     0x1027d8990 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q4_1_f32                     0x104fab100 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q5_0_f32                     0x104896580 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q5_1_f32                     0x104896d30 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q8_0_f32                     0x104fac3f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q2_K_f32                     0x104897390 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q3_K_f32                     0x105126820 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q4_K_f32                     0x104897f40 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q5_K_f32                     0x104fad540 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q6_K_f32                     0x105127c70 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq2_xxs_f32                  0x105127ed0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq2_xs_f32                   0x104fae170 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq3_xxs_f32                  0x1048988a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq3_s_f32                    0x104899420 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq2_s_f32                    0x105128790 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq1_s_f32                    0x104899680 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq1_m_f32                    0x104fae3d0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq4_nl_f32                   0x104faecb0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq4_xs_f32                   0x104fafaf0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rope_norm_f32                          0x10489a0f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rope_norm_f16                          0x104fb0c20 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rope_neox_f32                          0x1027d9570 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rope_neox_f16                          0x104faf820 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_im2col_f16                             0x104fb03a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_im2col_f32                             0x104fb1800 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_im2col_ext_f16                         0x104fb1f40 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_im2col_ext_f32                         0x104fb28f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_upscale_f32                            0x104fb2b50 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_pad_f32                                0x104fb3c00 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_timestep_embedding_f32                 0x10489c290 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_arange_f32                             0x10489b320 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_argsort_f32_i32_asc                    0x1027da260 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_argsort_f32_i32_desc                   0x10489d220 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_leaky_relu_f32                         0x10489e810 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h64                 0x104fb52e0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h80                 0x105128f30 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h96                 0x10489eec0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h112                0x10489f120 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h128                0x105129190 | th_max =  512 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h256                0x104fb4ab0 | th_max =  512 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64           (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h80           (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h96           (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h112          (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h128          (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h256          (not supported)\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h64                0x1027daa80 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h80                0x10489f900 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h96                0x1027db170 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h112               0x104fb6600 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h128               0x1027db840 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h256               0x10512a0f0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h64                0x1048a0630 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h80                0x1048a1040 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h96                0x10512aa50 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h112               0x10512b1e0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h128               0x10512bb40 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h256               0x10512c2a0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h64                0x10512cc30 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h80                0x1048a1db0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h96                0x10512d6f0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h112               0x104fb7020 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h128               0x10512dd90 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h256               0x10512dff0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h64                0x104fb5c10 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h80                0x1048a28b0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h96                0x104fb7830 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h112               0x10512e890 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h128               0x1048a3e00 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h256               0x133605d80 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h64                0x10512f640 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h80                0x1048a4590 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h96                0x105130060 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h112               0x1027dbb00 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h128               0x1051307c0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h256               0x105130e40 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_f16_h128            0x105131500 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_vec_bf16_h128      (not supported)\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_0_h128           0x105131ba0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_1_h128           0x1048a4fb0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_0_h128           0x1048a5bf0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_1_h128           0x104f95d60 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q8_0_h128           0x104f37380 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_f16_h256            0x1022d4890 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_vec_bf16_h256      (not supported)\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_0_h256           0x1048a6210 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_1_h256           0x1027dc250 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_0_h256           0x104fb9080 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_1_h256           0x104fbafb0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q8_0_h256           0x104fbb9d0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_f32                            0x104fba720 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_f16                            0x104fbcd90 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_cpy_f32_bf16                      (not supported)\n",
      "ggml_metal_init: loaded kernel_cpy_f16_f32                            0x1048a7470 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f16_f16                            0x1051324e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_cpy_bf16_f32                      (not supported)\n",
      "ggml_metal_init: skipping kernel_cpy_bf16_bf16                     (not supported)\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q8_0                           0x1048a7e60 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q4_0                           0x1027dd5a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q4_1                           0x1048a80c0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q5_0                           0x1048a8cb0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q5_1                           0x105132740 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_iq4_nl                         0x1048a9740 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_concat                                 0x105133480 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sqr                                    0x1048aadb0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sqrt                                   0x1048ab930 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sin                                    0x1051346e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cos                                    0x104fbe4f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sum_rows                               0x105135010 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_pool_2d_avg_f32                        0x1027dd800 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_pool_2d_max_f32                        0x1048ab4d0 | th_max = 1024 | th_width =   32\n",
      "llama_kv_cache_init:      Metal KV buffer size =    64.00 MiB\n",
      "llama_new_context_with_model: KV self size  =   64.00 MiB, K (f16):   32.00 MiB, V (f16):   32.00 MiB\n",
      "llama_new_context_with_model:        CPU  output buffer size =     0.49 MiB\n",
      "llama_new_context_with_model:      Metal compute buffer size =    64.62 MiB\n",
      "llama_new_context_with_model:        CPU compute buffer size =     2.25 MiB\n",
      "llama_new_context_with_model: graph nodes  = 1030\n",
      "llama_new_context_with_model: graph splits = 2\n",
      "AVX = 0 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | AMX_INT8 = 0 | FMA = 0 | NEON = 1 | SVE = 0 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | RISCV_VECT = 0 | WASM_SIMD = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 | \n",
      "Model metadata: {'general.quantization_version': '2', 'tokenizer.chat_template': \"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}{% endif %}\", 'tokenizer.ggml.eos_token_id': '128009', 'tokenizer.ggml.bos_token_id': '128000', 'tokenizer.ggml.pre': 'llama-bpe', 'tokenizer.ggml.model': 'gpt2', 'llama.vocab_size': '128256', 'llama.attention.head_count_kv': '8', 'llama.context_length': '8192', 'llama.attention.head_count': '32', 'general.file_type': '15', 'llama.feed_forward_length': '14336', 'llama.rope.dimension_count': '128', 'llama.rope.freq_base': '500000.000000', 'llama.embedding_length': '4096', 'general.architecture': 'llama', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'general.name': 'Llama-3-8B-optimal-merged-stage2', 'llama.block_count': '32'}\n",
      "Available chat formats from metadata: chat_template.default\n"
     ]
    }
   ],
   "source": [
    "from llama_cpp import Llama\n",
    "\n",
    "llm = Llama(\n",
    "    model_path=\"../models/Llama-3-ELYZA-JP-8B-q4_k_m.gguf\",\n",
    "    chat_format=\"llama-3\",\n",
    "    # n_ctx=1024,\n",
    "    n_batch=128,\n",
    "    n_gpu_layers=-1,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Llama.generate: 58 prefix-match hit, remaining 271 prompt tokens to eval\n",
      "llama_perf_context_print:        load time =    7094.75 ms\n",
      "llama_perf_context_print: prompt eval time =       0.00 ms /   271 tokens (    0.00 ms per token,      inf tokens per second)\n",
      "llama_perf_context_print:        eval time =       0.00 ms /   165 runs   (    0.00 ms per token,      inf tokens per second)\n",
      "llama_perf_context_print:       total time =   25745.42 ms /   436 tokens\n"
     ]
    }
   ],
   "source": [
    "response = llm.create_chat_completion(\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"system\",\n",
    "            \"content\": \"あなたは誠実で優秀な日本人のアシスタントです。特に指示が無い場合は、常に日本語で回答してください。\",\n",
    "        },\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": \"以下の文章を要約してください。\\\n",
    "            # 文章\\\n",
    "            クリアするまで脱出不可能、ゲームオーバーは本当の“死”を意味する──。謎の次世代MMO『ソードアート・オンライン(SAO)』の“真実”を知らずログインした約一万人のユーザーと共に、その過酷なデスバトルは幕を開けた。\\\n",
    "            SAOに参加した一人である主人公・キリトは、いち早くこのMMOの“真実”を受け入れる。そして、ゲームの舞台となる巨大浮遊城『アインクラッド』で、パーティを組まないソロプレイヤーとして頭角をあらわしていった。\\\n",
    "            クリア条件である最上階層到達を目指し、熾烈な冒険(クエスト)を単独で続けるキリトだったが、レイピアの名手・女流剣士アスナの強引な誘いによって彼女とコンビを組むことになってしまう。その出会いは、キリトに運命とも呼べる契機をもたらし……。果たして、キリトはこのゲームから抜け出すことができるのか。\",\n",
    "        },\n",
    "    ],\n",
    "    max_tokens=1024,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "以下は文章の要約です。\n",
      "\n",
      "謎のMMO『ソードアート・オンライン(SAO)』に約1万人のユーザーがログインしたが、ゲームの真実を知らずに脱出不可能なデスバトルが始まった。主人公のキリトは早くに真実を受け入れ、巨大浮遊城『アインクラッド』でソロプレイヤーとして活躍する。クリア条件の最上階層到達を目指すキリトは、女流剣士アスナとコンビを組むことになり、運命の出会いを果たす。果たしてキリトはこのゲームから抜け出すことができるのか。\n"
     ]
    }
   ],
   "source": [
    "print(response[\"choices\"][0][\"message\"][\"content\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'verbose': True,\n",
       " '_stack': <contextlib.ExitStack at 0x105b60ad0>,\n",
       " 'numa': 0,\n",
       " 'model_path': '../models/Llama-3-ELYZA-JP-8B-q4_k_m.gguf',\n",
       " 'model_params': <llama_cpp.llama_cpp.llama_model_params at 0x115e5ab50>,\n",
       " '_rpc_servers': None,\n",
       " 'tensor_split': None,\n",
       " '_c_tensor_split': None,\n",
       " 'kv_overrides': None,\n",
       " 'n_batch': 128,\n",
       " 'n_threads': 4,\n",
       " 'n_threads_batch': 8,\n",
       " '_seed': 1894574933,\n",
       " 'context_params': <llama_cpp.llama_cpp.llama_context_params at 0x115e59dd0>,\n",
       " 'last_n_tokens_size': 64,\n",
       " 'cache': None,\n",
       " 'lora_base': None,\n",
       " 'lora_scale': 1.0,\n",
       " 'lora_path': None,\n",
       " 'spm_infill': False,\n",
       " '_model': <llama_cpp._internals.LlamaModel at 0x105ad6690>,\n",
       " 'tokenizer_': <llama_cpp.llama_tokenizer.LlamaTokenizer at 0x115e0e450>,\n",
       " '_ctx': <llama_cpp._internals.LlamaContext at 0x105b5d1d0>,\n",
       " '_batch': <llama_cpp._internals.LlamaBatch at 0x10475e9d0>,\n",
       " '_lora_adapter': None,\n",
       " 'chat_format': 'llama-3',\n",
       " 'chat_handler': None,\n",
       " '_chat_handlers': {'chat_template.default': <function llama_cpp.llama_chat_format.chat_formatter_to_chat_completion_handler.<locals>.chat_completion_handler(*, llama: 'llama.Llama', messages: 'List[llama_types.ChatCompletionRequestMessage]', functions: 'Optional[List[llama_types.ChatCompletionFunction]]' = None, function_call: 'Optional[llama_types.ChatCompletionRequestFunctionCall]' = None, tools: 'Optional[List[llama_types.ChatCompletionTool]]' = None, tool_choice: 'Optional[llama_types.ChatCompletionToolChoiceOption]' = None, temperature: 'float' = 0.2, top_p: 'float' = 0.95, top_k: 'int' = 40, min_p: 'float' = 0.05, typical_p: 'float' = 1.0, stream: 'bool' = False, stop: 'Optional[Union[str, List[str]]]' = [], seed: 'Optional[int]' = None, response_format: 'Optional[llama_types.ChatCompletionRequestResponseFormat]' = None, max_tokens: 'Optional[int]' = None, presence_penalty: 'float' = 0.0, frequency_penalty: 'float' = 0.0, repeat_penalty: 'float' = 1.1, tfs_z: 'float' = 1.0, mirostat_mode: 'int' = 0, mirostat_tau: 'float' = 5.0, mirostat_eta: 'float' = 0.1, model: 'Optional[str]' = None, logits_processor: 'Optional[llama.LogitsProcessorList]' = None, grammar: 'Optional[llama.LlamaGrammar]' = None, logit_bias: 'Optional[Dict[str, float]]' = None, logprobs: 'Optional[bool]' = None, top_logprobs: 'Optional[int]' = None, **kwargs) -> 'Union[llama_types.CreateChatCompletionResponse, Iterator[llama_types.CreateChatCompletionStreamResponse]]'>},\n",
       " 'draft_model': None,\n",
       " '_n_vocab': 128256,\n",
       " '_n_ctx': 512,\n",
       " '_token_nl': 128,\n",
       " '_token_eos': 128009,\n",
       " '_candidates': <llama_cpp._internals.LlamaTokenDataArray at 0x115ec1450>,\n",
       " 'n_tokens': 494,\n",
       " 'input_ids': array([128000, 128006,   9125, 128007,    271,  30591, 112568,  15682,\n",
       "        124097, 103350,  16556, 104622, 106241,  26854, 102433, 107707,\n",
       "         39880,  57207, 105335,  52414,  38641,   1811,  66378,  20230,\n",
       "         64467,  20379,  29295,  43568,  16995, 126513,   5486,  40053,\n",
       "         20230, 102433, 102158,  16556, 113925,  39926,  72315,   1811,\n",
       "        128009, 128006,    882, 128007,    271,  88852,  16144,  83125,\n",
       "         30512,  31634, 103664,  39926,  72315,   1811,    310,    674,\n",
       "        112053,    310, 116381, 104612,  54926, 103296, 110645,  20834,\n",
       "         16937,  88367,   5486, 114567,  90962, 112164,  11972,  15682,\n",
       "        117475,  16144,   2118, 102625,    863,  30512, 115552,  54926,\n",
       "         17424,   1811, 105037,    236,  16144,  33671, 101083,  31640,\n",
       "          8195,     46,  44620, 102741,  65575,  39880,  84477,   9458,\n",
       "        110191, 118372,  10110,   7934,     46,   7705,  36761,  16144,\n",
       "          2118,  89151, 103350,    863,  30512,  53283, 121140,  77750,\n",
       "         76171,  56051, 103664,  15120,  32307, 107707, 108152,  38248,\n",
       "           114,  11972,  19732,  55999,  20230, 106116, 103188, 100845,\n",
       "           115,  26854,  68408,  22398,  66953, 127764,  15682, 106633,\n",
       "        125523, 107674,   1811,    310,  16998,     46,  20230, 110284,\n",
       "         56051, 122485, 103195, 122768,  35417,   9458,  62903,  37823,\n",
       "         20251,  15682, 116898,  43514, 103856,  47884,  51330,   8195,\n",
       "            46,  16144,   2118,  89151, 103350,    863, 114475,  76622,\n",
       "         17701, 104028, 124845,   5486, 114567,  16144, 107875,  55038,\n",
       "        117282, 109098,  27384, 111179, 109739,  60174,  44620,  39880,\n",
       "         76171, 107059, 105404,  36761,  16556,   5486,  80805, 117675,\n",
       "         30512, 103214,  17129, 100604, 102741,  42634,  57326, 108748,\n",
       "        104930,  11972, 103306, 103892,  64936,  30512,  30591,  33503,\n",
       "         78183, 109768, 100472,   1811,    310, 116381, 104612,  77195,\n",
       "        103195,  32335,  17905, 106090, 114050,  28037, 104067,  30512,\n",
       "         30832,  64467,  15024,   5486, 102448,    122, 111101,  26854,\n",
       "        112798, 117126,  10110,  29220,  76739,  71634,   7705,  30512,\n",
       "        110904, 106063,  16556, 106307, 105784,  62903,  37823,  20251,\n",
       "        103351,  29295,   5486, 108748,  70563,  39880,  16144,  13372,\n",
       "         46034,   9458,  58850,  89753, 119063, 101559,  39880,  22398,\n",
       "         96452,  16144, 104195,  73686,  26854,  45918,    246,  16995,\n",
       "        113468, 109453,  19732, 109713,  91482,  30512, 103214, 104004,\n",
       "        100909, 117084, 114732, 109807,  20834,  38093,  16995,  15682,\n",
       "          5486,  62903,  37823,  20251,  20230, 103768,  51609, 107173,\n",
       "        105324, 103854,  30369, 120273, 101513,  30512,  32977,  28713,\n",
       "        124949, 127891,  28873,  28713,  39926,   5486,  62903,  37823,\n",
       "         20251,  15682,  51330, 114567,  55031, 113487,  76622, 121406,\n",
       "        105908, 108608, 104865,   1811, 128009, 128006,  78191, 128007,\n",
       "           271,  88852,  15682,  83125,  16144,  31634, 103664,  38641,\n",
       "          3490, 105037,    236,  16144,   8195,     46,  44620, 102741,\n",
       "         65575,  39880,  84477,   9458, 110191, 118372,  10110,   7934,\n",
       "            46,   7705,  36761,  20230, 103664,     16,  32307, 107707,\n",
       "        108152,  38248,    114,  11972,  29295,  77750,  76171,  56051,\n",
       "         29295,   5486, 114567,  16144,  89151, 103350,  30512,  53283,\n",
       "        121140,  20230, 110645,  20834,  16937,  88367,  26854,  68408,\n",
       "         22398,  66953, 127764,  29295,  27704, 117864,   1811, 122768,\n",
       "         35417,  16144,  62903,  37823,  20251,  15682, 103856,  47884,\n",
       "         20230,  89151, 103350, 114475,  76622, 125639,   5486, 109098,\n",
       "         27384, 111179, 109739,  60174,  44620,  39880,  76171, 107059,\n",
       "        105404,  36761,  16556, 102741,  42634,  57326, 108748, 104930,\n",
       "         11972, 103306,  76706, 109526,    235,  54926,   1811,  29220,\n",
       "        104612,  77195,  16144,  32335,  17905, 106090, 114050,  28037,\n",
       "        104067,  30512,  30832,  64467,  17663,  62903,  37823,  20251,\n",
       "         15682,   5486,  58850,  89753, 119063, 101559,  39880,  22398,\n",
       "         96452,  19732, 109713,  91482,  30512, 103214, 104004, 100909,\n",
       "        115717,   5486, 103768,  51609,  16144,  20834,  38093,  16995,\n",
       "         30512,  28873,  28713,  17663,   1811,  28873,  28713,  39926,\n",
       "         62903,  37823,  20251,  15682,  51330, 114567,  55031, 113487,\n",
       "         76622, 121406, 105908, 108608, 104865,   1811, 127173, 104028,\n",
       "        108044, 108323,  19732,  97518,  89046,  47000, 107441, 108086,\n",
       "          5486, 105469, 103424, 102212,  23530,  15682,  17039,      1],\n",
       "       dtype=int32),\n",
       " 'scores': array([[0., 0., 0., ..., 0., 0., 0.],\n",
       "        [0., 0., 0., ..., 0., 0., 0.],\n",
       "        [0., 0., 0., ..., 0., 0., 0.],\n",
       "        ...,\n",
       "        [0., 0., 0., ..., 0., 0., 0.],\n",
       "        [0., 0., 0., ..., 0., 0., 0.],\n",
       "        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),\n",
       " '_mirostat_mu': c_float(10.0),\n",
       " 'metadata': {'general.quantization_version': '2',\n",
       "  'tokenizer.chat_template': \"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}{% endif %}\",\n",
       "  'tokenizer.ggml.eos_token_id': '128009',\n",
       "  'tokenizer.ggml.bos_token_id': '128000',\n",
       "  'tokenizer.ggml.pre': 'llama-bpe',\n",
       "  'tokenizer.ggml.model': 'gpt2',\n",
       "  'llama.vocab_size': '128256',\n",
       "  'llama.attention.head_count_kv': '8',\n",
       "  'llama.context_length': '8192',\n",
       "  'llama.attention.head_count': '32',\n",
       "  'general.file_type': '15',\n",
       "  'llama.feed_forward_length': '14336',\n",
       "  'llama.rope.dimension_count': '128',\n",
       "  'llama.rope.freq_base': '500000.000000',\n",
       "  'llama.embedding_length': '4096',\n",
       "  'general.architecture': 'llama',\n",
       "  'llama.attention.layer_norm_rms_epsilon': '0.000010',\n",
       "  'general.name': 'Llama-3-8B-optimal-merged-stage2',\n",
       "  'llama.block_count': '32'},\n",
       " '_sampler': <llama_cpp._internals.LlamaSampler at 0x1031ee450>}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vars(llm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}