binwang commited on
Commit
7d66eb7
·
1 Parent(s): b383046
Files changed (1) hide show
  1. app.py +0 -45
app.py CHANGED
@@ -9,10 +9,8 @@ from statistics import median
9
 
10
  print("Loading datasets...")
11
 
12
-
13
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
14
 
15
-
16
  def add_rank(df, compute_average=True):
17
  cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (Params)", "Embedding Dimensions", "Sequence Length"]]
18
  if len(cols_to_rank) == 1:
@@ -78,7 +76,6 @@ def get_data_cross_mmlu_overall(eval_mode='zero_shot', fillna=True, rank=True):
78
  AC3_3 = median(AC3_3)
79
 
80
  except:
81
- print(results_list)
82
  consistency_score_3 = -1
83
  overall_acc = -1
84
  AC3_3 = -1
@@ -146,7 +143,6 @@ def get_data_cross_mmlu_language(eval_mode='zero_shot', fillna=True, rank=True):
146
 
147
 
148
  except:
149
- print(results_list)
150
  English = -1
151
  Vietnamese = -1
152
  Chinese = -1
@@ -219,7 +215,6 @@ def get_data_cross_logiqa_overall(eval_mode='zero_shot', fillna=True, rank=True)
219
  AC3_3 = median(AC3_3)
220
 
221
  except:
222
- print(results_list)
223
  consistency_score_3 = -1
224
  overall_acc = -1
225
  AC3_3 = -1
@@ -287,7 +282,6 @@ def get_data_cross_logiqa_language(eval_mode='zero_shot', fillna=True, rank=True
287
 
288
 
289
  except:
290
- print(results_list)
291
  English = -1
292
  Vietnamese = -1
293
  Chinese = -1
@@ -351,7 +345,6 @@ def get_data_sg_eval(eval_mode='zero_shot', fillna=True, rank=True):
351
  accuracy = median([results['accuracy'] for results in results_list])
352
 
353
  except:
354
- print(results_list)
355
  accuracy = -1
356
 
357
  res = {
@@ -404,7 +397,6 @@ def get_data_us_eval(eval_mode='zero_shot', fillna=True, rank=True):
404
  accuracy = median([results['accuracy'] for results in results_list])
405
 
406
  except:
407
- print(results_list)
408
  accuracy = -1
409
 
410
 
@@ -458,7 +450,6 @@ def get_data_cn_eval(eval_mode='zero_shot', fillna=True, rank=True):
458
  accuracy = median([results['accuracy'] for results in results_list])
459
 
460
  except:
461
- print(results_list)
462
  accuracy = -1
463
 
464
 
@@ -512,7 +503,6 @@ def get_data_ph_eval(eval_mode='zero_shot', fillna=True, rank=True):
512
  accuracy = median([results['accuracy'] for results in results_list])
513
 
514
  except:
515
- print(results_list)
516
  accuracy = -1
517
 
518
 
@@ -566,7 +556,6 @@ def get_data_sing2eng(eval_mode='zero_shot', fillna=True, rank=True):
566
  bleu_score = median([results['bleu_score'] for results in results_list])
567
 
568
  except:
569
- print(results_list)
570
  bleu_score = -1
571
 
572
 
@@ -619,7 +608,6 @@ def get_data_flores_ind2eng(eval_mode='zero_shot', fillna=True, rank=True):
619
  bleu_score = median([results['bleu_score'] for results in results_list])
620
 
621
  except:
622
- print(results_list)
623
  bleu_score = -1
624
 
625
 
@@ -674,7 +662,6 @@ def get_data_flores_vie2eng(eval_mode='zero_shot', fillna=True, rank=True):
674
  bleu_score = median([results['bleu_score'] for results in results_list])
675
 
676
  except:
677
- print(results_list)
678
  bleu_score = -1
679
 
680
 
@@ -727,7 +714,6 @@ def get_data_flores_zho2eng(eval_mode='zero_shot', fillna=True, rank=True):
727
  bleu_score = median([results['bleu_score'] for results in results_list])
728
 
729
  except:
730
- print(results_list)
731
  bleu_score = -1
732
 
733
 
@@ -781,7 +767,6 @@ def get_data_flores_zsm2eng(eval_mode='zero_shot', fillna=True, rank=True):
781
  bleu_score = median([results['bleu_score'] for results in results_list])
782
 
783
  except:
784
- print(results_list)
785
  bleu_score = -1
786
 
787
 
@@ -835,7 +820,6 @@ def get_data_mmlu(eval_mode='zero_shot', fillna=True, rank=True):
835
  accuracy = median([results['accuracy'] for results in results_list])
836
 
837
  except:
838
- print(results_list)
839
  accuracy = -1
840
 
841
 
@@ -890,7 +874,6 @@ def get_data_mmlu_full(eval_mode='zero_shot', fillna=True, rank=True):
890
  accuracy = median([results['accuracy'] for results in results_list])
891
 
892
  except:
893
- print(results_list)
894
  accuracy = -1
895
 
896
 
@@ -944,7 +927,6 @@ def get_data_c_eval(eval_mode='zero_shot', fillna=True, rank=True):
944
  accuracy = median([results['accuracy'] for results in results_list])
945
 
946
  except:
947
- print(results_list)
948
  accuracy = -1
949
 
950
 
@@ -998,7 +980,6 @@ def get_data_c_eval_full(eval_mode='zero_shot', fillna=True, rank=True):
998
  accuracy = median([results['accuracy'] for results in results_list])
999
 
1000
  except:
1001
- print(results_list)
1002
  accuracy = -1
1003
 
1004
 
@@ -1054,7 +1035,6 @@ def get_data_cmmlu(eval_mode='zero_shot', fillna=True, rank=True):
1054
  accuracy = median([results['accuracy'] for results in results_list])
1055
 
1056
  except:
1057
- print(results_list)
1058
  accuracy = -1
1059
 
1060
 
@@ -1112,7 +1092,6 @@ def get_data_cmmlu_full(eval_mode='zero_shot', fillna=True, rank=True):
1112
  accuracy = median([results['accuracy'] for results in results_list])
1113
 
1114
  except:
1115
- print(results_list)
1116
  accuracy = -1
1117
 
1118
 
@@ -1167,7 +1146,6 @@ def get_data_zbench(eval_mode='zero_shot', fillna=True, rank=True):
1167
  accuracy = median([results['accuracy'] for results in results_list])
1168
 
1169
  except:
1170
- print(results_list)
1171
  accuracy = -1
1172
 
1173
 
@@ -1222,7 +1200,6 @@ def get_data_ind_emotion(eval_mode='zero_shot', fillna=True, rank=True):
1222
  accuracy = median([results['accuracy'] for results in results_list])
1223
 
1224
  except:
1225
- print(results_list)
1226
  accuracy = -1
1227
 
1228
 
@@ -1278,7 +1255,6 @@ def get_data_ocnli(eval_mode='zero_shot', fillna=True, rank=True):
1278
  accuracy = median([results['accuracy'] for results in results_list])
1279
 
1280
  except:
1281
- print(results_list)
1282
  accuracy = -1
1283
 
1284
 
@@ -1333,7 +1309,6 @@ def get_data_c3(eval_mode='zero_shot', fillna=True, rank=True):
1333
  accuracy = median([results['accuracy'] for results in results_list])
1334
 
1335
  except:
1336
- print(results_list)
1337
  accuracy = -1
1338
 
1339
 
@@ -1388,7 +1363,6 @@ def get_data_dream(eval_mode='zero_shot', fillna=True, rank=True):
1388
  accuracy = median([results['accuracy'] for results in results_list])
1389
 
1390
  except:
1391
- print(results_list)
1392
  accuracy = -1
1393
 
1394
 
@@ -1445,7 +1419,6 @@ def get_data_samsum(eval_mode='zero_shot', fillna=True, rank=True):
1445
  rougeL = median([results['rougeL'] for results in results_list])
1446
 
1447
  except:
1448
- print(results_list)
1449
  rouge1 = -1
1450
  rouge2 = -1
1451
  rougeL = -1
@@ -1505,7 +1478,6 @@ def get_data_dialogsum(eval_mode='zero_shot', fillna=True, rank=True):
1505
  rougeL = median([results['rougeL'] for results in results_list])
1506
 
1507
  except:
1508
- print(results_list)
1509
  rouge1 = -1
1510
  rouge2 = -1
1511
  rougeL = -1
@@ -1565,7 +1537,6 @@ def get_data_sst2(eval_mode='zero_shot', fillna=True, rank=True):
1565
  accuracy = median([results['accuracy'] for results in results_list])
1566
 
1567
  except:
1568
- print(results_list)
1569
  accuracy = -1
1570
 
1571
 
@@ -1621,7 +1592,6 @@ def get_data_cola(eval_mode='zero_shot', fillna=True, rank=True):
1621
  accuracy = median([results['accuracy'] for results in results_list])
1622
 
1623
  except:
1624
- print(results_list)
1625
  accuracy = -1
1626
 
1627
 
@@ -1678,7 +1648,6 @@ def get_data_qqp(eval_mode='zero_shot', fillna=True, rank=True):
1678
  accuracy = median([results['accuracy'] for results in results_list])
1679
 
1680
  except:
1681
- print(results_list)
1682
  accuracy = -1
1683
 
1684
 
@@ -1735,7 +1704,6 @@ def get_data_mnli(eval_mode='zero_shot', fillna=True, rank=True):
1735
  accuracy = median([results['accuracy'] for results in results_list])
1736
 
1737
  except:
1738
- print(results_list)
1739
  accuracy = -1
1740
 
1741
 
@@ -1792,7 +1760,6 @@ def get_data_qnli(eval_mode='zero_shot', fillna=True, rank=True):
1792
  accuracy = median([results['accuracy'] for results in results_list])
1793
 
1794
  except:
1795
- print(results_list)
1796
  accuracy = -1
1797
 
1798
 
@@ -1849,7 +1816,6 @@ def get_data_wnli(eval_mode='zero_shot', fillna=True, rank=True):
1849
  accuracy = median([results['accuracy'] for results in results_list])
1850
 
1851
  except:
1852
- print(results_list)
1853
  accuracy = -1
1854
 
1855
 
@@ -1906,7 +1872,6 @@ def get_data_rte(eval_mode='zero_shot', fillna=True, rank=True):
1906
  accuracy = median([results['accuracy'] for results in results_list])
1907
 
1908
  except:
1909
- print(results_list)
1910
  accuracy = -1
1911
 
1912
 
@@ -1964,7 +1929,6 @@ def get_data_mrpc(eval_mode='zero_shot', fillna=True, rank=True):
1964
  accuracy = median([results['accuracy'] for results in results_list])
1965
 
1966
  except:
1967
- print(results_list)
1968
  accuracy = -1
1969
 
1970
 
@@ -3052,15 +3016,6 @@ with block:
3052
 
3053
 
3054
 
3055
-
3056
-
3057
-
3058
-
3059
-
3060
-
3061
-
3062
-
3063
-
3064
  gr.Markdown(r"""
3065
 
3066
  If this work is useful to you, please citing our work:
 
9
 
10
  print("Loading datasets...")
11
 
 
12
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
13
 
 
14
  def add_rank(df, compute_average=True):
15
  cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (Params)", "Embedding Dimensions", "Sequence Length"]]
16
  if len(cols_to_rank) == 1:
 
76
  AC3_3 = median(AC3_3)
77
 
78
  except:
 
79
  consistency_score_3 = -1
80
  overall_acc = -1
81
  AC3_3 = -1
 
143
 
144
 
145
  except:
 
146
  English = -1
147
  Vietnamese = -1
148
  Chinese = -1
 
215
  AC3_3 = median(AC3_3)
216
 
217
  except:
 
218
  consistency_score_3 = -1
219
  overall_acc = -1
220
  AC3_3 = -1
 
282
 
283
 
284
  except:
 
285
  English = -1
286
  Vietnamese = -1
287
  Chinese = -1
 
345
  accuracy = median([results['accuracy'] for results in results_list])
346
 
347
  except:
 
348
  accuracy = -1
349
 
350
  res = {
 
397
  accuracy = median([results['accuracy'] for results in results_list])
398
 
399
  except:
 
400
  accuracy = -1
401
 
402
 
 
450
  accuracy = median([results['accuracy'] for results in results_list])
451
 
452
  except:
 
453
  accuracy = -1
454
 
455
 
 
503
  accuracy = median([results['accuracy'] for results in results_list])
504
 
505
  except:
 
506
  accuracy = -1
507
 
508
 
 
556
  bleu_score = median([results['bleu_score'] for results in results_list])
557
 
558
  except:
 
559
  bleu_score = -1
560
 
561
 
 
608
  bleu_score = median([results['bleu_score'] for results in results_list])
609
 
610
  except:
 
611
  bleu_score = -1
612
 
613
 
 
662
  bleu_score = median([results['bleu_score'] for results in results_list])
663
 
664
  except:
 
665
  bleu_score = -1
666
 
667
 
 
714
  bleu_score = median([results['bleu_score'] for results in results_list])
715
 
716
  except:
 
717
  bleu_score = -1
718
 
719
 
 
767
  bleu_score = median([results['bleu_score'] for results in results_list])
768
 
769
  except:
 
770
  bleu_score = -1
771
 
772
 
 
820
  accuracy = median([results['accuracy'] for results in results_list])
821
 
822
  except:
 
823
  accuracy = -1
824
 
825
 
 
874
  accuracy = median([results['accuracy'] for results in results_list])
875
 
876
  except:
 
877
  accuracy = -1
878
 
879
 
 
927
  accuracy = median([results['accuracy'] for results in results_list])
928
 
929
  except:
 
930
  accuracy = -1
931
 
932
 
 
980
  accuracy = median([results['accuracy'] for results in results_list])
981
 
982
  except:
 
983
  accuracy = -1
984
 
985
 
 
1035
  accuracy = median([results['accuracy'] for results in results_list])
1036
 
1037
  except:
 
1038
  accuracy = -1
1039
 
1040
 
 
1092
  accuracy = median([results['accuracy'] for results in results_list])
1093
 
1094
  except:
 
1095
  accuracy = -1
1096
 
1097
 
 
1146
  accuracy = median([results['accuracy'] for results in results_list])
1147
 
1148
  except:
 
1149
  accuracy = -1
1150
 
1151
 
 
1200
  accuracy = median([results['accuracy'] for results in results_list])
1201
 
1202
  except:
 
1203
  accuracy = -1
1204
 
1205
 
 
1255
  accuracy = median([results['accuracy'] for results in results_list])
1256
 
1257
  except:
 
1258
  accuracy = -1
1259
 
1260
 
 
1309
  accuracy = median([results['accuracy'] for results in results_list])
1310
 
1311
  except:
 
1312
  accuracy = -1
1313
 
1314
 
 
1363
  accuracy = median([results['accuracy'] for results in results_list])
1364
 
1365
  except:
 
1366
  accuracy = -1
1367
 
1368
 
 
1419
  rougeL = median([results['rougeL'] for results in results_list])
1420
 
1421
  except:
 
1422
  rouge1 = -1
1423
  rouge2 = -1
1424
  rougeL = -1
 
1478
  rougeL = median([results['rougeL'] for results in results_list])
1479
 
1480
  except:
 
1481
  rouge1 = -1
1482
  rouge2 = -1
1483
  rougeL = -1
 
1537
  accuracy = median([results['accuracy'] for results in results_list])
1538
 
1539
  except:
 
1540
  accuracy = -1
1541
 
1542
 
 
1592
  accuracy = median([results['accuracy'] for results in results_list])
1593
 
1594
  except:
 
1595
  accuracy = -1
1596
 
1597
 
 
1648
  accuracy = median([results['accuracy'] for results in results_list])
1649
 
1650
  except:
 
1651
  accuracy = -1
1652
 
1653
 
 
1704
  accuracy = median([results['accuracy'] for results in results_list])
1705
 
1706
  except:
 
1707
  accuracy = -1
1708
 
1709
 
 
1760
  accuracy = median([results['accuracy'] for results in results_list])
1761
 
1762
  except:
 
1763
  accuracy = -1
1764
 
1765
 
 
1816
  accuracy = median([results['accuracy'] for results in results_list])
1817
 
1818
  except:
 
1819
  accuracy = -1
1820
 
1821
 
 
1872
  accuracy = median([results['accuracy'] for results in results_list])
1873
 
1874
  except:
 
1875
  accuracy = -1
1876
 
1877
 
 
1929
  accuracy = median([results['accuracy'] for results in results_list])
1930
 
1931
  except:
 
1932
  accuracy = -1
1933
 
1934
 
 
3016
 
3017
 
3018
 
 
 
 
 
 
 
 
 
 
3019
  gr.Markdown(r"""
3020
 
3021
  If this work is useful to you, please citing our work: