xzuyn commited on
Commit
6330f6d
·
verified ·
1 Parent(s): 8399ed8

Upload Step 150/9427

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_state.json +549 -3
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d58e8b70cae140c27869d506ce1fd1f11fba30e54691e644d41e79743eee8cff
3
  size 1907432232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:badd47000be379b28f59dad18021a7b6074d3deaff0246e3db26f6e121fdd9c3
3
  size 1907432232
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.008485812781755503,
6
  "eval_steps": 10,
7
- "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -640,6 +640,552 @@
640
  "eval_samples_per_second": 1.888,
641
  "eval_steps_per_second": 0.472,
642
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  }
644
  ],
645
  "logging_steps": 1,
@@ -659,7 +1205,7 @@
659
  "attributes": {}
660
  }
661
  },
662
- "total_flos": 1.2900468007108608e+17,
663
  "train_batch_size": 4,
664
  "trial_name": null,
665
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.015910898965791568,
6
  "eval_steps": 10,
7
+ "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
640
  "eval_samples_per_second": 1.888,
641
  "eval_steps_per_second": 0.472,
642
  "step": 80
643
+ },
644
+ {
645
+ "epoch": 0.008591885441527447,
646
+ "grad_norm": 1.0189088582992554,
647
+ "learning_rate": 4.995886830373846e-07,
648
+ "loss": 1.5961,
649
+ "step": 81
650
+ },
651
+ {
652
+ "epoch": 0.00869795810129939,
653
+ "grad_norm": 0.7427169680595398,
654
+ "learning_rate": 4.995835649831073e-07,
655
+ "loss": 1.6456,
656
+ "step": 82
657
+ },
658
+ {
659
+ "epoch": 0.008804030761071334,
660
+ "grad_norm": 0.8337876796722412,
661
+ "learning_rate": 4.995784459437785e-07,
662
+ "loss": 1.2846,
663
+ "step": 83
664
+ },
665
+ {
666
+ "epoch": 0.008910103420843278,
667
+ "grad_norm": 0.6561726331710815,
668
+ "learning_rate": 4.995733259191137e-07,
669
+ "loss": 1.5769,
670
+ "step": 84
671
+ },
672
+ {
673
+ "epoch": 0.009016176080615222,
674
+ "grad_norm": 0.7599915862083435,
675
+ "learning_rate": 4.995682049088284e-07,
676
+ "loss": 1.7303,
677
+ "step": 85
678
+ },
679
+ {
680
+ "epoch": 0.009122248740387166,
681
+ "grad_norm": 0.7153452038764954,
682
+ "learning_rate": 4.995630829126379e-07,
683
+ "loss": 1.5249,
684
+ "step": 86
685
+ },
686
+ {
687
+ "epoch": 0.009228321400159108,
688
+ "grad_norm": 0.7663532495498657,
689
+ "learning_rate": 4.995579599302577e-07,
690
+ "loss": 1.4317,
691
+ "step": 87
692
+ },
693
+ {
694
+ "epoch": 0.009334394059931053,
695
+ "grad_norm": 0.8668680191040039,
696
+ "learning_rate": 4.995528359614027e-07,
697
+ "loss": 1.8046,
698
+ "step": 88
699
+ },
700
+ {
701
+ "epoch": 0.009440466719702997,
702
+ "grad_norm": 0.684675931930542,
703
+ "learning_rate": 4.99547711005788e-07,
704
+ "loss": 1.393,
705
+ "step": 89
706
+ },
707
+ {
708
+ "epoch": 0.00954653937947494,
709
+ "grad_norm": 0.8086925745010376,
710
+ "learning_rate": 4.995425850631287e-07,
711
+ "loss": 1.5898,
712
+ "step": 90
713
+ },
714
+ {
715
+ "epoch": 0.00954653937947494,
716
+ "eval_loss": 1.4867264032363892,
717
+ "eval_runtime": 68.2886,
718
+ "eval_samples_per_second": 1.874,
719
+ "eval_steps_per_second": 0.469,
720
+ "step": 90
721
+ },
722
+ {
723
+ "epoch": 0.009652612039246885,
724
+ "grad_norm": 0.6806846261024475,
725
+ "learning_rate": 4.995374581331393e-07,
726
+ "loss": 1.5126,
727
+ "step": 91
728
+ },
729
+ {
730
+ "epoch": 0.009758684699018827,
731
+ "grad_norm": 0.7875713109970093,
732
+ "learning_rate": 4.995323302155347e-07,
733
+ "loss": 1.5119,
734
+ "step": 92
735
+ },
736
+ {
737
+ "epoch": 0.009864757358790771,
738
+ "grad_norm": 0.6719956398010254,
739
+ "learning_rate": 4.995272013100296e-07,
740
+ "loss": 1.611,
741
+ "step": 93
742
+ },
743
+ {
744
+ "epoch": 0.009970830018562716,
745
+ "grad_norm": 0.7365944385528564,
746
+ "learning_rate": 4.995220714163384e-07,
747
+ "loss": 1.2394,
748
+ "step": 94
749
+ },
750
+ {
751
+ "epoch": 0.01007690267833466,
752
+ "grad_norm": 0.6620836853981018,
753
+ "learning_rate": 4.995169405341754e-07,
754
+ "loss": 1.6405,
755
+ "step": 95
756
+ },
757
+ {
758
+ "epoch": 0.010182975338106604,
759
+ "grad_norm": 0.7277278900146484,
760
+ "learning_rate": 4.995118086632551e-07,
761
+ "loss": 1.7809,
762
+ "step": 96
763
+ },
764
+ {
765
+ "epoch": 0.010289047997878546,
766
+ "grad_norm": 0.7061654925346375,
767
+ "learning_rate": 4.995066758032913e-07,
768
+ "loss": 1.361,
769
+ "step": 97
770
+ },
771
+ {
772
+ "epoch": 0.01039512065765049,
773
+ "grad_norm": 0.7945475578308105,
774
+ "learning_rate": 4.995015419539983e-07,
775
+ "loss": 1.608,
776
+ "step": 98
777
+ },
778
+ {
779
+ "epoch": 0.010501193317422435,
780
+ "grad_norm": 0.7080848813056946,
781
+ "learning_rate": 4.994964071150901e-07,
782
+ "loss": 1.6869,
783
+ "step": 99
784
+ },
785
+ {
786
+ "epoch": 0.010607265977194379,
787
+ "grad_norm": 0.6934227347373962,
788
+ "learning_rate": 4.994912712862803e-07,
789
+ "loss": 1.4698,
790
+ "step": 100
791
+ },
792
+ {
793
+ "epoch": 0.010607265977194379,
794
+ "eval_loss": 1.4758180379867554,
795
+ "eval_runtime": 67.7252,
796
+ "eval_samples_per_second": 1.89,
797
+ "eval_steps_per_second": 0.472,
798
+ "step": 100
799
+ },
800
+ {
801
+ "epoch": 0.010713338636966323,
802
+ "grad_norm": 0.6977412700653076,
803
+ "learning_rate": 4.994861344672828e-07,
804
+ "loss": 1.6276,
805
+ "step": 101
806
+ },
807
+ {
808
+ "epoch": 0.010819411296738265,
809
+ "grad_norm": 0.8750130534172058,
810
+ "learning_rate": 4.994809966578113e-07,
811
+ "loss": 1.5425,
812
+ "step": 102
813
+ },
814
+ {
815
+ "epoch": 0.01092548395651021,
816
+ "grad_norm": 1.6952922344207764,
817
+ "learning_rate": 4.99475857857579e-07,
818
+ "loss": 1.6159,
819
+ "step": 103
820
+ },
821
+ {
822
+ "epoch": 0.011031556616282153,
823
+ "grad_norm": 0.7001510858535767,
824
+ "learning_rate": 4.994707180662995e-07,
825
+ "loss": 1.5937,
826
+ "step": 104
827
+ },
828
+ {
829
+ "epoch": 0.011137629276054098,
830
+ "grad_norm": 0.7474836707115173,
831
+ "learning_rate": 4.99465577283686e-07,
832
+ "loss": 1.4775,
833
+ "step": 105
834
+ },
835
+ {
836
+ "epoch": 0.011243701935826042,
837
+ "grad_norm": 1.114769458770752,
838
+ "learning_rate": 4.994604355094518e-07,
839
+ "loss": 1.4304,
840
+ "step": 106
841
+ },
842
+ {
843
+ "epoch": 0.011349774595597984,
844
+ "grad_norm": 0.7222145199775696,
845
+ "learning_rate": 4.994552927433097e-07,
846
+ "loss": 1.2972,
847
+ "step": 107
848
+ },
849
+ {
850
+ "epoch": 0.011455847255369928,
851
+ "grad_norm": 0.7787733674049377,
852
+ "learning_rate": 4.994501489849728e-07,
853
+ "loss": 1.8544,
854
+ "step": 108
855
+ },
856
+ {
857
+ "epoch": 0.011561919915141872,
858
+ "grad_norm": 0.6402618288993835,
859
+ "learning_rate": 4.994450042341541e-07,
860
+ "loss": 1.5189,
861
+ "step": 109
862
+ },
863
+ {
864
+ "epoch": 0.011667992574913817,
865
+ "grad_norm": 0.6818183064460754,
866
+ "learning_rate": 4.99439858490566e-07,
867
+ "loss": 1.3971,
868
+ "step": 110
869
+ },
870
+ {
871
+ "epoch": 0.011667992574913817,
872
+ "eval_loss": 1.4671616554260254,
873
+ "eval_runtime": 68.8225,
874
+ "eval_samples_per_second": 1.86,
875
+ "eval_steps_per_second": 0.465,
876
+ "step": 110
877
+ },
878
+ {
879
+ "epoch": 0.011774065234685759,
880
+ "grad_norm": 0.8468677997589111,
881
+ "learning_rate": 4.994347117539214e-07,
882
+ "loss": 1.6674,
883
+ "step": 111
884
+ },
885
+ {
886
+ "epoch": 0.011880137894457703,
887
+ "grad_norm": 0.9757254123687744,
888
+ "learning_rate": 4.994295640239325e-07,
889
+ "loss": 1.5847,
890
+ "step": 112
891
+ },
892
+ {
893
+ "epoch": 0.011986210554229647,
894
+ "grad_norm": 0.9481499195098877,
895
+ "learning_rate": 4.99424415300312e-07,
896
+ "loss": 1.3539,
897
+ "step": 113
898
+ },
899
+ {
900
+ "epoch": 0.012092283214001591,
901
+ "grad_norm": 0.6789958477020264,
902
+ "learning_rate": 4.99419265582772e-07,
903
+ "loss": 1.5099,
904
+ "step": 114
905
+ },
906
+ {
907
+ "epoch": 0.012198355873773535,
908
+ "grad_norm": 0.6501567959785461,
909
+ "learning_rate": 4.994141148710247e-07,
910
+ "loss": 1.5429,
911
+ "step": 115
912
+ },
913
+ {
914
+ "epoch": 0.012304428533545478,
915
+ "grad_norm": 1.262799859046936,
916
+ "learning_rate": 4.994089631647824e-07,
917
+ "loss": 1.1193,
918
+ "step": 116
919
+ },
920
+ {
921
+ "epoch": 0.012410501193317422,
922
+ "grad_norm": 0.685874342918396,
923
+ "learning_rate": 4.994038104637567e-07,
924
+ "loss": 1.728,
925
+ "step": 117
926
+ },
927
+ {
928
+ "epoch": 0.012516573853089366,
929
+ "grad_norm": 0.7375260591506958,
930
+ "learning_rate": 4.993986567676594e-07,
931
+ "loss": 1.6958,
932
+ "step": 118
933
+ },
934
+ {
935
+ "epoch": 0.01262264651286131,
936
+ "grad_norm": 0.7215054631233215,
937
+ "learning_rate": 4.993935020762025e-07,
938
+ "loss": 1.3697,
939
+ "step": 119
940
+ },
941
+ {
942
+ "epoch": 0.012728719172633254,
943
+ "grad_norm": 0.7148920297622681,
944
+ "learning_rate": 4.993883463890975e-07,
945
+ "loss": 1.6451,
946
+ "step": 120
947
+ },
948
+ {
949
+ "epoch": 0.012728719172633254,
950
+ "eval_loss": 1.459830403327942,
951
+ "eval_runtime": 68.6763,
952
+ "eval_samples_per_second": 1.864,
953
+ "eval_steps_per_second": 0.466,
954
+ "step": 120
955
+ },
956
+ {
957
+ "epoch": 0.012834791832405197,
958
+ "grad_norm": 0.6565997004508972,
959
+ "learning_rate": 4.993831897060559e-07,
960
+ "loss": 1.4405,
961
+ "step": 121
962
+ },
963
+ {
964
+ "epoch": 0.012940864492177141,
965
+ "grad_norm": 0.7842444181442261,
966
+ "learning_rate": 4.993780320267891e-07,
967
+ "loss": 1.659,
968
+ "step": 122
969
+ },
970
+ {
971
+ "epoch": 0.013046937151949085,
972
+ "grad_norm": 0.7806555032730103,
973
+ "learning_rate": 4.993728733510084e-07,
974
+ "loss": 1.6056,
975
+ "step": 123
976
+ },
977
+ {
978
+ "epoch": 0.01315300981172103,
979
+ "grad_norm": 0.8176814317703247,
980
+ "learning_rate": 4.993677136784249e-07,
981
+ "loss": 1.6902,
982
+ "step": 124
983
+ },
984
+ {
985
+ "epoch": 0.013259082471492973,
986
+ "grad_norm": 0.7916125059127808,
987
+ "learning_rate": 4.993625530087498e-07,
988
+ "loss": 1.4324,
989
+ "step": 125
990
+ },
991
+ {
992
+ "epoch": 0.013365155131264916,
993
+ "grad_norm": 0.6581283211708069,
994
+ "learning_rate": 4.993573913416939e-07,
995
+ "loss": 1.753,
996
+ "step": 126
997
+ },
998
+ {
999
+ "epoch": 0.01347122779103686,
1000
+ "grad_norm": 0.6454209685325623,
1001
+ "learning_rate": 4.99352228676968e-07,
1002
+ "loss": 1.5184,
1003
+ "step": 127
1004
+ },
1005
+ {
1006
+ "epoch": 0.013577300450808804,
1007
+ "grad_norm": 0.746411919593811,
1008
+ "learning_rate": 4.99347065014283e-07,
1009
+ "loss": 1.5564,
1010
+ "step": 128
1011
+ },
1012
+ {
1013
+ "epoch": 0.013683373110580748,
1014
+ "grad_norm": 0.710649847984314,
1015
+ "learning_rate": 4.993419003533493e-07,
1016
+ "loss": 1.46,
1017
+ "step": 129
1018
+ },
1019
+ {
1020
+ "epoch": 0.013789445770352692,
1021
+ "grad_norm": 0.7483745217323303,
1022
+ "learning_rate": 4.993367346938775e-07,
1023
+ "loss": 1.4491,
1024
+ "step": 130
1025
+ },
1026
+ {
1027
+ "epoch": 0.013789445770352692,
1028
+ "eval_loss": 1.4538627862930298,
1029
+ "eval_runtime": 68.3516,
1030
+ "eval_samples_per_second": 1.873,
1031
+ "eval_steps_per_second": 0.468,
1032
+ "step": 130
1033
+ },
1034
+ {
1035
+ "epoch": 0.013895518430124635,
1036
+ "grad_norm": 0.7695822715759277,
1037
+ "learning_rate": 4.993315680355781e-07,
1038
+ "loss": 1.5002,
1039
+ "step": 131
1040
+ },
1041
+ {
1042
+ "epoch": 0.014001591089896579,
1043
+ "grad_norm": 0.7457824945449829,
1044
+ "learning_rate": 4.993264003781611e-07,
1045
+ "loss": 1.4464,
1046
+ "step": 132
1047
+ },
1048
+ {
1049
+ "epoch": 0.014107663749668523,
1050
+ "grad_norm": 0.6966177821159363,
1051
+ "learning_rate": 4.99321231721337e-07,
1052
+ "loss": 1.5493,
1053
+ "step": 133
1054
+ },
1055
+ {
1056
+ "epoch": 0.014213736409440467,
1057
+ "grad_norm": 0.7250157594680786,
1058
+ "learning_rate": 4.993160620648156e-07,
1059
+ "loss": 1.6838,
1060
+ "step": 134
1061
+ },
1062
+ {
1063
+ "epoch": 0.014319809069212411,
1064
+ "grad_norm": 0.6885997653007507,
1065
+ "learning_rate": 4.993108914083069e-07,
1066
+ "loss": 1.3327,
1067
+ "step": 135
1068
+ },
1069
+ {
1070
+ "epoch": 0.014425881728984354,
1071
+ "grad_norm": 0.6850383877754211,
1072
+ "learning_rate": 4.993057197515208e-07,
1073
+ "loss": 1.373,
1074
+ "step": 136
1075
+ },
1076
+ {
1077
+ "epoch": 0.014531954388756298,
1078
+ "grad_norm": 0.8969720005989075,
1079
+ "learning_rate": 4.993005470941668e-07,
1080
+ "loss": 1.6318,
1081
+ "step": 137
1082
+ },
1083
+ {
1084
+ "epoch": 0.014638027048528242,
1085
+ "grad_norm": 0.6809049844741821,
1086
+ "learning_rate": 4.992953734359548e-07,
1087
+ "loss": 1.3728,
1088
+ "step": 138
1089
+ },
1090
+ {
1091
+ "epoch": 0.014744099708300186,
1092
+ "grad_norm": 0.6609801054000854,
1093
+ "learning_rate": 4.992901987765941e-07,
1094
+ "loss": 1.4302,
1095
+ "step": 139
1096
+ },
1097
+ {
1098
+ "epoch": 0.01485017236807213,
1099
+ "grad_norm": 0.6714500188827515,
1100
+ "learning_rate": 4.99285023115794e-07,
1101
+ "loss": 1.3048,
1102
+ "step": 140
1103
+ },
1104
+ {
1105
+ "epoch": 0.01485017236807213,
1106
+ "eval_loss": 1.4487165212631226,
1107
+ "eval_runtime": 68.3673,
1108
+ "eval_samples_per_second": 1.872,
1109
+ "eval_steps_per_second": 0.468,
1110
+ "step": 140
1111
+ },
1112
+ {
1113
+ "epoch": 0.014956245027844072,
1114
+ "grad_norm": 0.7616133689880371,
1115
+ "learning_rate": 4.992798464532639e-07,
1116
+ "loss": 1.6308,
1117
+ "step": 141
1118
+ },
1119
+ {
1120
+ "epoch": 0.015062317687616017,
1121
+ "grad_norm": 0.7023948431015015,
1122
+ "learning_rate": 4.99274668788713e-07,
1123
+ "loss": 1.4595,
1124
+ "step": 142
1125
+ },
1126
+ {
1127
+ "epoch": 0.01516839034738796,
1128
+ "grad_norm": 1.1847342252731323,
1129
+ "learning_rate": 4.992694901218502e-07,
1130
+ "loss": 1.892,
1131
+ "step": 143
1132
+ },
1133
+ {
1134
+ "epoch": 0.015274463007159905,
1135
+ "grad_norm": 0.6449259519577026,
1136
+ "learning_rate": 4.992643104523846e-07,
1137
+ "loss": 1.4526,
1138
+ "step": 144
1139
+ },
1140
+ {
1141
+ "epoch": 0.015380535666931849,
1142
+ "grad_norm": 0.8200846910476685,
1143
+ "learning_rate": 4.992591297800247e-07,
1144
+ "loss": 1.6277,
1145
+ "step": 145
1146
+ },
1147
+ {
1148
+ "epoch": 0.015486608326703791,
1149
+ "grad_norm": 0.627193808555603,
1150
+ "learning_rate": 4.992539481044796e-07,
1151
+ "loss": 1.2767,
1152
+ "step": 146
1153
+ },
1154
+ {
1155
+ "epoch": 0.015592680986475736,
1156
+ "grad_norm": 0.6754332780838013,
1157
+ "learning_rate": 4.992487654254575e-07,
1158
+ "loss": 1.7414,
1159
+ "step": 147
1160
+ },
1161
+ {
1162
+ "epoch": 0.01569875364624768,
1163
+ "grad_norm": 0.8303399682044983,
1164
+ "learning_rate": 4.992435817426671e-07,
1165
+ "loss": 1.5313,
1166
+ "step": 148
1167
+ },
1168
+ {
1169
+ "epoch": 0.015804826306019624,
1170
+ "grad_norm": 0.7653703093528748,
1171
+ "learning_rate": 4.992383970558168e-07,
1172
+ "loss": 1.6444,
1173
+ "step": 149
1174
+ },
1175
+ {
1176
+ "epoch": 0.015910898965791568,
1177
+ "grad_norm": 0.756195068359375,
1178
+ "learning_rate": 4.992332113646148e-07,
1179
+ "loss": 1.5063,
1180
+ "step": 150
1181
+ },
1182
+ {
1183
+ "epoch": 0.015910898965791568,
1184
+ "eval_loss": 1.4446443319320679,
1185
+ "eval_runtime": 67.8607,
1186
+ "eval_samples_per_second": 1.886,
1187
+ "eval_steps_per_second": 0.472,
1188
+ "step": 150
1189
  }
1190
  ],
1191
  "logging_steps": 1,
 
1205
  "attributes": {}
1206
  }
1207
  },
1208
+ "total_flos": 2.418837751332864e+17,
1209
  "train_batch_size": 4,
1210
  "trial_name": null,
1211
  "trial_params": null