File size: 2,104 Bytes
96ab1ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd632ba
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---
datasets:
- mindchain/wikitext2
- yahma/alpaca-cleaned
metrics:
- perplexity
- accuracy
base_model:
- TinyLlama/TinyLlama_v1.1


model-index:
- name: TinyLlama_v1.1_mix_wikitext_alpaca_2bit_BitDistiller_baseline
  results:
  - task:
      type: multiple-choice
      name: QA Benchmarking
    dataset:
      type: allenai/arc
      name: ARC-Challenge
      config: challenge
      split: test
    metrics:
      - type: accuracy
        name: Accuracy
        value: 0.2150170648464164
      - type: accuracy
        name: Normalized Accuracy
        value: 0.24573378839590443
  - task:
      type: multiple-choice
      name: QA Benchmarking
    dataset:
      type: hellaswag
      name: HellaSwag
      split: test
    metrics:
      - type: accuracy
        name: Accuracy
        value: 0.3240390360485959
      - type: accuracy
        name: Normalized Accuracy
        value: 0.37333200557657836
  - task:
      type: multiple-choice
      name: QA Benchmarking
    dataset:
      type: piqa
      name: PIQA
      split: validation
    metrics:
      - type: accuracy
        name: Accuracy
        value: 0.6082698585418934
      - type: accuracy
        name: Normalized Accuracy
        value: 0.6071817192600653
  - task:
      type: multiple-choice
      name: QA Benchmarking
    dataset:
      type: winogrande
      name: Winogrande
      split: test
    metrics:
      - type: accuracy
        name: Accuracy
        value: 0.5201262825572218
  - task:
      type: multiple-choice
      name: QA Benchmarking
    dataset:
      type: aggregated
      name: QA-Avg
    metrics:
      - type: accuracy
        name: QA Average
        value: 0.4168630604985319
  - task:
      type: language-modeling
      name: Language Modeling
    dataset:
      type: wikitext
      name: WikiText-2
      split: test
    metrics:
      - type: perplexity
        name: Perplexity
        value: 22.655162811279297


---

TODO: check the splits of each dataset

Loss curves:

![image/png](https://cdn-uploads.huggingface.co/production/uploads/678feb6368616344ef035e43/HaskRCayRW-vQuEE06jZj.png)